This is an automated email from the ASF dual-hosted git repository.
fanng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 7f96b76b79 [MINOR] build(docker): add GCS connector to
lakehouse-iceberg and iceberg-rest-server (#7451)
7f96b76b79 is described below
commit 7f96b76b790a19bcd57f5b5ab9a086d20faf02f2
Author: Bharath Krishna <[email protected]>
AuthorDate: Thu Jun 26 02:44:19 2025 -0700
[MINOR] build(docker): add GCS connector to lakehouse-iceberg and
iceberg-rest-server (#7451)
### What changes were proposed in this pull request?
If using Hadoop fileio not GCS fileio to read GCS data for better
performance, we need `gcs-connector-hadoop2-2.2.18-shaded.jar`
Otherwise this causes error on reading the iceberg tables, like
```
Caused by: org.apache.hadoop.fs.UnsupportedFileSystemException: No
FileSystem for scheme "gs"
at
org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3239)
~[hadoop-common-2.10.2.jar:?]
at
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3259)
~[hadoop-common-2.10.2.jar:?]
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:121)
~[hadoop-common-2.10.2.jar:?]
at
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3310)
~[hadoop-common-2.10.2.jar:?]
```
The gcs connector jar is required to fix this
After the fix, the JAR is copied to
```
root@gravitino-768865d9cd-r75m4:~/gravitino# ls
catalogs/lakehouse-iceberg/libs/ | grep gcs
gcs-connector-hadoop2-2.2.18-shaded.jar
root@gravitino-768865d9cd-r75m4:~/gravitino# ls iceberg-rest-server/libs/ |
grep gcs
gcs-connector-hadoop2-2.2.18-shaded.jar
```
### Why are the changes needed?
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
local enviroment
---
dev/docker/gravitino/gravitino-dependency.sh | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/dev/docker/gravitino/gravitino-dependency.sh
b/dev/docker/gravitino/gravitino-dependency.sh
index ff6fe9164a..c842923aaf 100755
--- a/dev/docker/gravitino/gravitino-dependency.sh
+++ b/dev/docker/gravitino/gravitino-dependency.sh
@@ -25,6 +25,29 @@ gravitino_package_dir="${gravitino_dir}/packages/gravitino"
gravitino_staging_dir="${gravitino_package_dir}/staging"
gravitino_iceberg_rest_dir="${gravitino_package_dir}/iceberg-rest-server/libs/"
+# Function to download and distribute GCS connector
+download_gcs_connector() {
+ local gcs_connector_jar="gcs-connector-hadoop2-2.2.18-shaded.jar"
+ local
gcs_connector_url="https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.18/${gcs_connector_jar}"
+ local temp_file="/tmp/${gcs_connector_jar}"
+
+ echo "INFO: Downloading GCS connector: ${gcs_connector_jar}"
+ wget -q "${gcs_connector_url}" -O "${temp_file}"
+
+ # Copy to lakehouse-iceberg catalog libs
+ mkdir -p
"${gravitino_dir}/packages/gravitino/catalogs/lakehouse-iceberg/libs"
+ cp "${temp_file}"
"${gravitino_dir}/packages/gravitino/catalogs/lakehouse-iceberg/libs/${gcs_connector_jar}"
+ echo "INFO: Added GCS connector to lakehouse-iceberg/libs"
+
+ # Copy to iceberg-rest-server libs
+ mkdir -p "${gravitino_dir}/packages/gravitino/iceberg-rest-server/libs"
+ cp "${temp_file}"
"${gravitino_dir}/packages/gravitino/iceberg-rest-server/libs/${gcs_connector_jar}"
+ echo "INFO: Added GCS connector to iceberg-rest-server/libs"
+
+ # Clean up temporary file
+ rm "${temp_file}"
+}
+
# Build the Gravitino project
${gravitino_home}/gradlew clean build -x test
@@ -84,6 +107,8 @@ cp ${gravitino_home}/bundles/gcp/build/libs/*.jar
"${gravitino_iceberg_rest_dir}
cp ${gravitino_home}/bundles/azure/build/libs/*.jar
"${gravitino_iceberg_rest_dir}"
cp ${gravitino_home}/bundles/aliyun-bundle/build/libs/*.jar
"${gravitino_iceberg_rest_dir}"
+download_gcs_connector
+
# Keeping the container running at all times
cat <<EOF >> "${gravitino_dir}/packages/gravitino/bin/gravitino.sh"