This is an automated email from the ASF dual-hosted git repository.

fanng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f96b76b79 [MINOR] build(docker): add GCS connector to 
lakehouse-iceberg and iceberg-rest-server (#7451)
7f96b76b79 is described below

commit 7f96b76b790a19bcd57f5b5ab9a086d20faf02f2
Author: Bharath Krishna <[email protected]>
AuthorDate: Thu Jun 26 02:44:19 2025 -0700

    [MINOR] build(docker): add GCS connector to lakehouse-iceberg and 
iceberg-rest-server (#7451)
    
    ### What changes were proposed in this pull request?
    
    If using Hadoop fileio not GCS fileio to read GCS data for better
    performance, we need `gcs-connector-hadoop2-2.2.18-shaded.jar`
    
    Otherwise this causes error on reading the iceberg tables, like
    ```
    Caused by: org.apache.hadoop.fs.UnsupportedFileSystemException: No 
FileSystem for scheme "gs"
            at 
org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3239) 
~[hadoop-common-2.10.2.jar:?]
            at 
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3259) 
~[hadoop-common-2.10.2.jar:?]
            at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:121) 
~[hadoop-common-2.10.2.jar:?]
            at 
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3310) 
~[hadoop-common-2.10.2.jar:?]
    ```
    
    The gcs connector jar is required to fix this
    
    After the fix, the JAR is copied to
    ```
    root@gravitino-768865d9cd-r75m4:~/gravitino# ls 
catalogs/lakehouse-iceberg/libs/ | grep gcs
    gcs-connector-hadoop2-2.2.18-shaded.jar
    root@gravitino-768865d9cd-r75m4:~/gravitino# ls iceberg-rest-server/libs/ | 
grep gcs
    gcs-connector-hadoop2-2.2.18-shaded.jar
    ```
    
    ### Why are the changes needed?
    
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    local enviroment
---
 dev/docker/gravitino/gravitino-dependency.sh | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/dev/docker/gravitino/gravitino-dependency.sh 
b/dev/docker/gravitino/gravitino-dependency.sh
index ff6fe9164a..c842923aaf 100755
--- a/dev/docker/gravitino/gravitino-dependency.sh
+++ b/dev/docker/gravitino/gravitino-dependency.sh
@@ -25,6 +25,29 @@ gravitino_package_dir="${gravitino_dir}/packages/gravitino"
 gravitino_staging_dir="${gravitino_package_dir}/staging"
 gravitino_iceberg_rest_dir="${gravitino_package_dir}/iceberg-rest-server/libs/"
 
+# Function to download and distribute GCS connector
+download_gcs_connector() {
+  local gcs_connector_jar="gcs-connector-hadoop2-2.2.18-shaded.jar"
+  local 
gcs_connector_url="https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.18/${gcs_connector_jar}";
+  local temp_file="/tmp/${gcs_connector_jar}"
+
+  echo "INFO: Downloading GCS connector: ${gcs_connector_jar}"
+  wget -q "${gcs_connector_url}" -O "${temp_file}"
+
+  # Copy to lakehouse-iceberg catalog libs
+  mkdir -p 
"${gravitino_dir}/packages/gravitino/catalogs/lakehouse-iceberg/libs"
+  cp "${temp_file}" 
"${gravitino_dir}/packages/gravitino/catalogs/lakehouse-iceberg/libs/${gcs_connector_jar}"
+  echo "INFO: Added GCS connector to lakehouse-iceberg/libs"
+
+  # Copy to iceberg-rest-server libs
+  mkdir -p "${gravitino_dir}/packages/gravitino/iceberg-rest-server/libs"
+  cp "${temp_file}" 
"${gravitino_dir}/packages/gravitino/iceberg-rest-server/libs/${gcs_connector_jar}"
+  echo "INFO: Added GCS connector to iceberg-rest-server/libs"
+
+  # Clean up temporary file
+  rm "${temp_file}"
+}
+
 # Build the Gravitino project
 ${gravitino_home}/gradlew clean build -x test
 
@@ -84,6 +107,8 @@ cp ${gravitino_home}/bundles/gcp/build/libs/*.jar 
"${gravitino_iceberg_rest_dir}
 cp ${gravitino_home}/bundles/azure/build/libs/*.jar 
"${gravitino_iceberg_rest_dir}"
 cp ${gravitino_home}/bundles/aliyun-bundle/build/libs/*.jar 
"${gravitino_iceberg_rest_dir}"
 
+download_gcs_connector
+
 # Keeping the container running at all times
 cat <<EOF >> "${gravitino_dir}/packages/gravitino/bin/gravitino.sh"
 

Reply via email to