This is an automated email from the ASF dual-hosted git repository.
mehulbatra pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git
The following commit(s) were added to refs/heads/main by this push:
new be6fc3b19 [build] Introduce flink-quickstart docker file (#1759)
be6fc3b19 is described below
commit be6fc3b1971a513b3c6bd7f59e97c9ab1acbbd88
Author: yuxia Luo <[email protected]>
AuthorDate: Mon Oct 13 20:50:30 2025 +0800
[build] Introduce flink-quickstart docker file (#1759)
* [build] Introduce quickstart docker
* add checksum
* address comments
* address comments
* update to use hadoop3 to shared by iceberg
---
docker/{ => fluss}/Dockerfile | 0
docker/{ => fluss}/docker-entrypoint.sh | 0
docker/quickstart-flink/Dockerfile | 49 ++++++
docker/quickstart-flink/README.md | 41 +++++
docker/quickstart-flink/bin/sql-client | 21 +++
docker/quickstart-flink/prepare_build.sh | 234 +++++++++++++++++++++++++++++
docker/quickstart-flink/sql/sql-client.sql | 68 +++++++++
7 files changed, 413 insertions(+)
diff --git a/docker/Dockerfile b/docker/fluss/Dockerfile
similarity index 100%
rename from docker/Dockerfile
rename to docker/fluss/Dockerfile
diff --git a/docker/docker-entrypoint.sh b/docker/fluss/docker-entrypoint.sh
similarity index 100%
rename from docker/docker-entrypoint.sh
rename to docker/fluss/docker-entrypoint.sh
diff --git a/docker/quickstart-flink/Dockerfile
b/docker/quickstart-flink/Dockerfile
new file mode 100644
index 000000000..303728229
--- /dev/null
+++ b/docker/quickstart-flink/Dockerfile
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Use Flink 1.20.0 as base image
+FROM flink:1.20.0-scala_2.12-java17
+
+# Switch to root user for installation and setup
+USER root
+
+# Install necessary packages
+RUN apt-get update && \
+ apt-get install -y tree && \
+ rm -rf /var/lib/apt/lists/*
+
+# Copy sql-client script to the container
+COPY bin/* /opt/sql-client/
+
+# Set working directory and environment
+WORKDIR /opt/sql-client
+ENV SQL_CLIENT_HOME=/opt/sql-client
+
+# Copy Fluss connector JARs and SQL files
+# Copy JARs to both sql-client lib and Flink lib directories
+COPY lib/* /opt/sql-client/lib/
+COPY sql/* /opt/sql-client/sql/
+COPY lib/* /opt/flink/lib/
+COPY opt/* /opt/flink/opt/
+
+# Modify docker-entrypoint.sh to allow Flink to run as root user
+# This is needed for the quickstart environment
+RUN sed -i 's/exec $(drop_privs_cmd)/exec/g' /docker-entrypoint.sh
+
+# Make sql-client script executable
+RUN ["chmod", "+x", "/opt/sql-client/sql-client"]
diff --git a/docker/quickstart-flink/README.md
b/docker/quickstart-flink/README.md
new file mode 100644
index 000000000..6c56cbcba
--- /dev/null
+++ b/docker/quickstart-flink/README.md
@@ -0,0 +1,41 @@
+# Fluss Quickstart Flink Docker
+
+This directory contains the Docker setup for Fluss Quickstart with Flink
integration.
+
+## Overview
+
+The Fluss Quickstart Flink Docker image provides a complete environment for
running Flink with Fluss, powered by Paimon lake storage.
+
+## Prerequisites
+
+Before building the Docker image, ensure you have:
+
+1. Check out the code version that you want to use for the Docker image. Go to
the project root directory and build Fluss using `./mvnw clean package
-DskipTests`.
+The local build will be used for the Docker image.
+2. Docker installed and running
+3. Internet access for retrieving dependencies
+
+## Build Process
+
+The build process consists of two main steps:
+
+### Step 1: Prepare Build Files
+
+First, you need to prepare the required JAR files and dependencies:
+
+```bash
+# Make the script executable
+chmod +x prepare_build.sh
+
+# Run the preparation script
+./prepare_build.sh
+```
+
+### Step 2: Build Docker Image
+
+After the preparation is complete, build the Docker image:
+
+```bash
+# Build the Docker image
+docker build -t fluss/quickstart-flink:1.20-latest .
+```
diff --git a/docker/quickstart-flink/bin/sql-client
b/docker/quickstart-flink/bin/sql-client
new file mode 100644
index 000000000..1288bfb72
--- /dev/null
+++ b/docker/quickstart-flink/bin/sql-client
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+${FLINK_HOME}/bin/sql-client.sh -i ${SQL_CLIENT_HOME}/sql/sql-client.sql
\ No newline at end of file
diff --git a/docker/quickstart-flink/prepare_build.sh
b/docker/quickstart-flink/prepare_build.sh
new file mode 100755
index 000000000..f73d4cbf2
--- /dev/null
+++ b/docker/quickstart-flink/prepare_build.sh
@@ -0,0 +1,234 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+# Configuration
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Logging functions
+log_info() {
+ echo "ℹ️ $1"
+}
+
+log_success() {
+ echo "✅ $1"
+}
+
+log_error() {
+ echo "❌ $1" >&2
+}
+
+# Utility function to copy JAR files with version numbers
+copy_jar() {
+ local src_pattern="$1"
+ local dest_dir="$2"
+ local description="$3"
+
+ log_info "Copying $description..."
+
+ # Find matching files
+ local matches=($src_pattern)
+ local count=${#matches[@]}
+
+ # No files matched
+ if (( count == 0 )); then
+ log_error "No matching JAR files found: $src_pattern"
+ log_error "Please build the Fluss project first: mvn clean package"
+ return 1
+ fi
+
+ # Multiple files matched
+ if (( count > 1 )); then
+ log_error "Multiple matching JAR files found:"
+ printf " %s\n" "${matches[@]}"
+ return 1
+ fi
+
+ # Exactly one file matched → copy it with original file name
+ mkdir -p "$dest_dir"
+ cp "${matches[0]}" "$dest_dir/"
+ log_success "Copied: $(basename "${matches[0]}")"
+}
+
+# Utility function to download and verify JAR
+download_jar() {
+ local url="$1"
+ local dest_file="$2"
+ local expected_hash="$3"
+ local description="$4"
+
+ log_info "Downloading $description..."
+
+ # Download the file
+ if ! wget -O "$dest_file" "$url"; then
+ log_error "Failed to download $description from $url"
+ return 1
+ fi
+
+ # Verify file size
+ if [ ! -s "$dest_file" ]; then
+ log_error "Downloaded file is empty: $dest_file"
+ return 1
+ fi
+
+ # Verify checksum if provided
+ if [ -n "$expected_hash" ]; then
+ local actual_hash=$(shasum "$dest_file" | awk '{print $1}')
+ if [ "$expected_hash" != "$actual_hash" ]; then
+ log_error "Checksum mismatch for $description"
+ log_error "Expected: $expected_hash"
+ log_error "Actual: $actual_hash"
+ return 1
+ fi
+ log_success "Checksum verified for $description"
+ else
+ log_success "Downloaded $description"
+ fi
+}
+
+# Check if required directories exist
+check_prerequisites() {
+ log_info "Checking prerequisites..."
+
+ local required_dirs=(
+ "$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target"
+ "$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target"
+ "$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target"
+ )
+
+ for dir in "${required_dirs[@]}"; do
+ if [ ! -d "$dir" ]; then
+ log_error "Required directory not found: $dir"
+ log_error "Please build the Fluss project first: mvn clean package"
+ exit 1
+ fi
+ done
+
+ log_success "All prerequisites met"
+}
+
+# Main execution
+main() {
+ log_info "Preparing JAR files for Fluss Quickstart Flink Docker..."
+ log_info "Project root: $PROJECT_ROOT"
+
+ # Check prerequisites
+ check_prerequisites
+
+ # Clean and create directories
+ log_info "Setting up directories..."
+ rm -rf lib opt
+ mkdir -p lib opt
+
+ # Copy Fluss connector JARs
+ log_info "Copying Fluss connector JARs..."
+ copy_jar
"$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target/fluss-flink-1.20-*.jar"
"./lib" "fluss-flink-1.20 connector"
+ copy_jar
"$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target/fluss-lake-paimon-*.jar"
"./lib" "fluss-lake-paimon connector"
+
+ # Download external dependencies
+ log_info "Downloading external dependencies..."
+
+ # Download flink-faker for data generation
+ download_jar \
+
"https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar"
\
+ "./lib/flink-faker-0.5.3.jar" \
+ "" \
+ "flink-faker-0.5.3"
+
+ # Download flink-shaded-hadoop-2-uber for Hadoop integration
+ download_jar \
+
"https://repo1.maven.org/maven2/io/trino/hadoop/hadoop-apache/3.3.5-2/hadoop-apache-3.3.5-2.jar"
\
+ "./lib/hadoop-apache-3.3.5-2.jar" \
+ "508255883b984483a45ca48d5af6365d4f013bb8" \
+ "hadoop-apache-3.3.5-2.jar"
+
+ # Download paimon-flink connector
+ download_jar \
+
"https://repo1.maven.org/maven2/org/apache/paimon/paimon-flink-1.20/1.2.0/paimon-flink-1.20-1.2.0.jar"
\
+ "./lib/paimon-flink-1.20-1.2.0.jar" \
+ "b9f8762c6e575f6786f1d156a18d51682ffc975c" \
+ "paimon-flink-1.20-1.2.0"
+
+ # Prepare lake tiering JAR
+ log_info "Preparing lake tiering JAR..."
+ copy_jar
"$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target/fluss-flink-tiering-*.jar"
"./opt" "fluss-flink-tiering"
+
+ # Final verification
+ verify_jars
+
+ # Show summary
+ show_summary
+}
+
+# Verify that all required JAR files are present
+verify_jars() {
+ log_info "Verifying all required JAR files are present..."
+
+ local missing_jars=()
+ local lib_jars=(
+ "fluss-flink-1.20-*.jar"
+ "fluss-lake-paimon-*.jar"
+ "flink-faker-0.5.3.jar"
+ "hadoop-apache-3.3.5-2.jar"
+ "paimon-flink-1.20-1.2.0.jar"
+ )
+
+ local opt_jars=(
+ "fluss-flink-tiering-*.jar"
+ )
+
+ # Check lib directory
+ for jar_pattern in "${lib_jars[@]}"; do
+ if ! ls ./lib/$jar_pattern >/dev/null 2>&1; then
+ missing_jars+=("lib/$jar_pattern")
+ fi
+ done
+
+ # Check opt directory
+ for jar_pattern in "${opt_jars[@]}"; do
+ if ! ls ./opt/$jar_pattern >/dev/null 2>&1; then
+ missing_jars+=("opt/$jar_pattern")
+ fi
+ done
+
+ # Report results
+ if [ ${#missing_jars[@]} -eq 0 ]; then
+ log_success "All required JAR files are present!"
+ else
+ log_error "Missing required JAR files:"
+ for jar in "${missing_jars[@]}"; do
+ log_error " - $jar"
+ done
+ exit 1
+ fi
+}
+
+# Summary function
+show_summary() {
+ log_success "JAR files preparation completed!"
+ echo ""
+ log_info "📦 Generated JAR files:"
+ echo "Lib directory:"
+ ls -la ./lib/ 2>/dev/null || echo " (empty)"
+ echo "Opt directory:"
+ ls -la ./opt/ 2>/dev/null || echo " (empty)"
+}
+
+# Run main function
+main "$@"
diff --git a/docker/quickstart-flink/sql/sql-client.sql
b/docker/quickstart-flink/sql/sql-client.sql
new file mode 100644
index 000000000..1d3c17556
--- /dev/null
+++ b/docker/quickstart-flink/sql/sql-client.sql
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TEMPORARY TABLE source_order (
+ `order_key` BIGINT,
+ `cust_key` INT,
+ `total_price` DECIMAL(15, 2),
+ `order_date` DATE,
+ `order_priority` STRING,
+ `clerk` STRING
+) WITH (
+ 'connector' = 'faker',
+ 'rows-per-second' = '10',
+ 'number-of-rows' = '10000',
+ 'fields.order_key.expression' = '#{number.numberBetween
''0'',''100000000''}',
+ 'fields.cust_key.expression' = '#{number.numberBetween ''0'',''20''}',
+ 'fields.total_price.expression' = '#{number.randomDouble
''3'',''1'',''1000''}',
+ 'fields.order_date.expression' = '#{date.past ''100'' ''DAYS''}',
+ 'fields.order_priority.expression' = '#{regexify ''(low|medium|high){1}''}',
+ 'fields.clerk.expression' = '#{regexify
''(Clerk1|Clerk2|Clerk3|Clerk4){1}''}'
+);
+
+CREATE TEMPORARY TABLE source_customer (
+ `cust_key` INT,
+ `name` STRING,
+ `phone` STRING,
+ `nation_key` INT NOT NULL,
+ `acctbal` DECIMAL(15, 2),
+ `mktsegment` STRING,
+ PRIMARY KEY (`cust_key`) NOT ENFORCED
+) WITH (
+ 'connector' = 'faker',
+ 'number-of-rows' = '200',
+ 'fields.cust_key.expression' = '#{number.numberBetween ''0'',''20''}',
+ 'fields.name.expression' = '#{funnyName.name}',
+ 'fields.nation_key.expression' = '#{number.numberBetween ''1'',''5''}',
+ 'fields.phone.expression' = '#{phoneNumber.cellPhone}',
+ 'fields.acctbal.expression' = '#{number.randomDouble ''3'',''1'',''1000''}',
+ 'fields.mktsegment.expression' = '#{regexify
''(AUTOMOBILE|BUILDING|FURNITURE|MACHINERY|HOUSEHOLD){1}''}'
+);
+
+CREATE TEMPORARY TABLE `source_nation` (
+ `nation_key` INT NOT NULL,
+ `name` STRING,
+ PRIMARY KEY (`nation_key`) NOT ENFORCED
+) WITH (
+ 'connector' = 'faker',
+ 'number-of-rows' = '100',
+ 'fields.nation_key.expression' = '#{number.numberBetween ''1'',''5''}',
+ 'fields.name.expression' = '#{regexify
''(CANADA|JORDAN|CHINA|UNITED|INDIA){1}''}'
+);
+
+SET 'table.exec.sink.not-null-enforcer'='DROP';
\ No newline at end of file