This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 3ba355e5c [#4367] feat(iceberg): add OSS support for Gravitino Iceberg
REST server (#4597)
3ba355e5c is described below
commit 3ba355e5c15ea20f64d494f31e01ef0f7c5149d5
Author: FANNG <[email protected]>
AuthorDate: Mon Sep 2 13:15:22 2024 +0800
[#4367] feat(iceberg): add OSS support for Gravitino Iceberg REST server
(#4597)
### What changes were proposed in this pull request?
add OSS support for Gravitino Iceberg REST server
### Why are the changes needed?
Fix: #4367
### Does this PR introduce _any_ user-facing change?
yes, will add a separate PR to add document
### How was this patch tested?
1. setup a Iceberg REST server will following configurations
```
gravitino.iceberg-rest.catalog-backend = jdbc
gravitino.iceberg-rest.jdbc-driver = org.postgresql.Driver
gravitino.iceberg-rest.uri = jdbc:postgresql://127.0.0.1:5432/postgres
gravitino.iceberg-rest.jdbc-user = postgres
gravitino.iceberg-rest.jdbc-password = xxx
gravitino.iceberg-rest.jdbc-initialize = true
# change to s3a://test/my/key/prefix for Hive catalog backend
gravitino.iceberg-rest.warehouse = oss://xxx/key/prefix
gravitino.iceberg-rest.io-impl= org.apache.iceberg.aliyun.oss.OSSFileIO
gravitino.iceberg-rest.oss-access-key-id = xx
gravitino.iceberg-rest.oss-access-key-secret = xx
gravitino.iceberg-rest.oss-endpoint = https://oss-cn-beijing.aliyuncs.com
```
2. test with Spark SQL
---
LICENSE.bin | 5 +++++
.../lakehouse/iceberg/IcebergConstants.java | 7 +++++++
.../lakehouse/iceberg/IcebergPropertiesUtils.java | 8 ++++++++
.../iceberg/IcebergCatalogPropertiesMetadata.java | 12 +++++++++++
docs/iceberg-rest-service.md | 24 +++++++++++++++++++---
docs/lakehouse-iceberg-catalog.md | 22 +++++++++++++++++++-
docs/spark-connector/spark-catalog-iceberg.md | 7 ++++++-
gradle/libs.versions.toml | 1 +
iceberg/iceberg-common/build.gradle.kts | 1 +
.../gravitino/iceberg/common/IcebergConfig.java | 21 +++++++++++++++++++
.../iceberg/common/ops/IcebergTableOps.java | 5 ++++-
11 files changed, 107 insertions(+), 6 deletions(-)
diff --git a/LICENSE.bin b/LICENSE.bin
index b56a0fdf4..27052f442 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -296,6 +296,11 @@
Apache HttpCore
Apache HttpClient
Apache Iceberg
+ Apache Iceberg Aliyun
+ Apache Iceberg api
+ Apache Iceberg AWS
+ Apache Iceberg core
+ Apache Iceberg Hive metastore
Apache Ivy
Apache Log4j 1.x Compatibility API
Apache Log4j API
diff --git
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java
index 21d5db3ff..cb5a23369 100644
---
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java
+++
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java
@@ -46,6 +46,13 @@ public class IcebergConstants {
public static final String GRAVITINO_S3_REGION = "s3-region";
public static final String AWS_S3_REGION = "client.region";
+ public static final String GRAVITINO_OSS_ENDPOINT = "oss-endpoint";
+ public static final String ICEBERG_OSS_ENDPOINT = "oss.endpoint";
+ public static final String GRAVITINO_OSS_ACCESS_KEY_ID = "oss-access-key-id";
+ public static final String ICEBERG_OSS_ACCESS_KEY_ID =
"client.access-key-id";
+ public static final String GRAVITINO_OSS_ACCESS_KEY_SECRET =
"oss-access-key-secret";
+ public static final String ICEBERG_OSS_ACCESS_KEY_SECRET =
"client.access-key-secret";
+
// Iceberg Table properties constants
public static final String COMMENT = "comment";
diff --git
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java
index 3420daa97..b016dd6e7 100644
---
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java
+++
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java
@@ -39,12 +39,20 @@ public class IcebergPropertiesUtils {
map.put(IcebergConstants.WAREHOUSE, IcebergConstants.WAREHOUSE);
map.put(IcebergConstants.CATALOG_BACKEND_NAME,
IcebergConstants.CATALOG_BACKEND_NAME);
map.put(IcebergConstants.IO_IMPL, IcebergConstants.IO_IMPL);
+ // S3
map.put(IcebergConstants.GRAVITINO_S3_ENDPOINT,
IcebergConstants.ICEBERG_S3_ENDPOINT);
map.put(IcebergConstants.GRAVITINO_S3_REGION,
IcebergConstants.AWS_S3_REGION);
map.put(IcebergConstants.GRAVITINO_S3_ACCESS_KEY_ID,
IcebergConstants.ICEBERG_S3_ACCESS_KEY_ID);
map.put(
IcebergConstants.GRAVITINO_S3_SECRET_ACCESS_KEY,
IcebergConstants.ICEBERG_S3_SECRET_ACCESS_KEY);
+ // OSS
+ map.put(IcebergConstants.GRAVITINO_OSS_ENDPOINT,
IcebergConstants.ICEBERG_OSS_ENDPOINT);
+ map.put(
+ IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_ID,
IcebergConstants.ICEBERG_OSS_ACCESS_KEY_ID);
+ map.put(
+ IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_SECRET,
+ IcebergConstants.ICEBERG_OSS_ACCESS_KEY_SECRET);
GRAVITINO_CONFIG_TO_ICEBERG = Collections.unmodifiableMap(map);
}
diff --git
a/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java
b/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java
index 3f8345fd0..0165d09d3 100644
---
a/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java
+++
b/catalogs/catalog-lakehouse-iceberg/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java
@@ -94,6 +94,18 @@ public class IcebergCatalogPropertiesMetadata extends
BaseCatalogPropertiesMetad
"s3 secret-access-key",
false /* immutable */,
null /* defaultValue */,
+ true /* hidden */),
+ stringOptionalPropertyEntry(
+ IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_ID,
+ "OSS access-key-id",
+ false /* immutable */,
+ null /* defaultValue */,
+ true /* hidden */),
+ stringOptionalPropertyEntry(
+ IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_SECRET,
+ "OSS access-key-secret",
+ false /* immutable */,
+ null /* defaultValue */,
true /* hidden */));
HashMap<String, PropertyEntry<?>> result = Maps.newHashMap();
result.putAll(Maps.uniqueIndex(propertyEntries, PropertyEntry::getName));
diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md
index 67fb96dd4..e1c8e6f1e 100644
--- a/docs/iceberg-rest-service.md
+++ b/docs/iceberg-rest-service.md
@@ -18,7 +18,10 @@ The Apache Gravitino Iceberg REST Server follows the [Apache
Iceberg REST API sp
- multi table transaction
- pagination
- Works as a catalog proxy, supporting `Hive` and `JDBC` as catalog backend.
-- Supports HDFS and S3 storage.
+- Supports multi storage.
+ - HDFS
+ - S3
+ - OSS
- Supports OAuth2 and HTTPS.
- Provides a pluggable metrics store interface to store and delete Iceberg
metrics.
@@ -100,8 +103,6 @@ The detailed configuration items are as follows:
### Storage
-Gravitino Iceberg REST server supports S3 and HDFS for storage.
-
#### S3 configuration
Gravitino Iceberg REST service supports using static access-key-id and
secret-access-key to access S3 data.
@@ -120,6 +121,23 @@ For other Iceberg s3 properties not managed by Gravitino
like `s3.sse.type`, you
To configure the JDBC catalog backend, set the
`gravitino.iceberg-rest.warehouse` parameter to
`s3://{bucket_name}/${prefix_name}`. For the Hive catalog backend, set
`gravitino.iceberg-rest.warehouse` to `s3a://{bucket_name}/${prefix_name}`.
Additionally, download the [Iceberg AWS
bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle)
and place it in the classpath of Iceberg REST server.
:::
+#### OSS configuration
+
+Gravitino Iceberg REST service supports using static access-key-id and
secret-access-key to access OSS data.
+
+| Configuration item | Description
|
Default value | Required | Since Version |
+|------------------------------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
+| `gravitino.iceberg-rest.io-impl` | The IO implementation for
`FileIO` in Iceberg, use `org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. |
(none) | No | 0.6.0 |
+| `gravitino.iceberg-rest.oss-access-key-id` | The static access key ID
used to access OSS data. |
(none) | No | 0.7.0 |
+| `gravitino.iceberg-rest.oss-secret-access-key` | The static secret access
key used to access OSS data. |
(none) | No | 0.7.0 |
+| `gravitino.iceberg-rest.oss-endpoint` | The endpoint of Aliyun OSS
service. |
(none) | No | 0.7.0 |
+
+For other Iceberg OSS properties not managed by Gravitino like
`client.security-token`, you could config it directly by
`gravitino.iceberg-rest.client.security-token`.
+
+:::info
+Please set the `gravitino.iceberg-rest.warehouse` parameter to
`oss://{bucket_name}/${prefix_name}`. Additionally, download the [Aliyun OSS
SDK](https://gosspublic.alicdn.com/sdks/java/aliyun_java_sdk_3.10.2.zip) and
copy `aliyun-sdk-oss-3.10.2.jar`, `hamcrest-core-1.1.jar`, `jdom2-2.0.6.jar` in
the classpath of Iceberg REST server, `iceberg-rest-server/libs` for the
auxiliary server, `libs` for the standalone server.
+:::
+
#### HDFS configuration
You should place HDFS configuration file to the classpath of the Iceberg REST
server, `iceberg-rest-server/conf` for Gravitino server package, `conf` for
standalone Gravitino Iceberg REST server package. When writing to HDFS, the
Gravitino Iceberg REST catalog service can only operate as the specified HDFS
user and doesn't support proxying to other HDFS users. See [How to access
Apache Hadoop](gravitino-server-config.md#how-to-access-apache-hadoop) for more
details.
diff --git a/docs/lakehouse-iceberg-catalog.md
b/docs/lakehouse-iceberg-catalog.md
index 96bbb1986..738f0e432 100644
--- a/docs/lakehouse-iceberg-catalog.md
+++ b/docs/lakehouse-iceberg-catalog.md
@@ -28,7 +28,10 @@ Builds with Apache Iceberg `1.5.2`. The Apache Iceberg table
format version is `
- Works as a catalog proxy, supporting `Hive`, `JDBC` and `REST` as catalog
backend.
- Supports DDL operations for Iceberg schemas and tables.
- Doesn't support snapshot or table management operations.
-- Supports S3 and HDFS storage.
+- Supports multi storage.
+ - S3
+ - HDFS
+ - OSS
- Supports Kerberos or simple authentication for Iceberg catalog with Hive
backend.
### Catalog properties
@@ -83,6 +86,23 @@ For other Iceberg s3 properties not managed by Gravitino
like `s3.sse.type`, you
To configure the JDBC catalog backend, set the `warehouse` parameter to
`s3://{bucket_name}/${prefix_name}`. For the Hive catalog backend, set
`warehouse` to `s3a://{bucket_name}/${prefix_name}`. Additionally, download the
[Iceberg AWS bundle]([Iceberg AWS
bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle))
and place it in the `catalogs/lakehouse-iceberg/libs/` directory.
:::
+#### OSS
+
+Gravitino Iceberg REST service supports using static access-key-id and
secret-access-key to access OSS data.
+
+| Configuration item | Description
| Default value | Required |
Since Version |
+|-------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
+| `io-impl` | The IO implementation for `FileIO` in Iceberg, use
`org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. | (none) | No |
0.6.0 |
+| `oss-access-key-id` | The static access key ID used to access OSS data.
| (none) | No |
0.7.0 |
+| `oss-secret-access-key` | The static secret access key used to access OSS
data. | (none) | No
| 0.7.0 |
+| `oss-endpoint` | The endpoint of Aliyun OSS service.
| (none) | No |
0.7.0 |
+
+For other Iceberg OSS properties not managed by Gravitino like
`client.security-token`, you could config it directly by
`gravitino.bypass.client.security-token`.
+
+:::info
+Please set the `warehouse` parameter to `oss://{bucket_name}/${prefix_name}`.
Additionally, download the [Aliyun OSS
SDK](https://gosspublic.alicdn.com/sdks/java/aliyun_java_sdk_3.10.2.zip) and
copy `aliyun-sdk-oss-3.10.2.jar`, `hamcrest-core-1.1.jar`, `jdom2-2.0.6.jar` in
the `catalogs/lakehouse-iceberg/libs/` directory.
+:::
+
#### Catalog backend security
Users can use the following properties to configure the security of the
catalog backend if needed. For example, if you are using a Kerberos Hive
catalog backend, you must set `authentication.type` to `Kerberos` and provide
`authentication.kerberos.principal` and `authentication.kerberos.keytab-uri`.
diff --git a/docs/spark-connector/spark-catalog-iceberg.md
b/docs/spark-connector/spark-catalog-iceberg.md
index 1e1855d2c..b13b0ccf9 100644
--- a/docs/spark-connector/spark-catalog-iceberg.md
+++ b/docs/spark-connector/spark-catalog-iceberg.md
@@ -111,6 +111,7 @@ Gravitino spark connector will transform below property
names which are defined
| `io-impl` | `io-impl` |
The io implementation for `FileIO` in Iceberg.
| 0.6.0 |
| `s3-endpoint` | `s3.endpoint` | An
alternative endpoint of the S3 service, This could be used for S3FileIO with
any s3-compatible object storage service that has a different endpoint, or
access a private S3 endpoint in a virtual private cloud. | 0.6.0 |
| `s3-region` | `client.region` |
The region of the S3 service, like `us-west-2`.
| 0.6.0 |
+| `oss-endpoint` | `oss.endpoint` |
The endpoint of Aliyun OSS service.
| 0.7.0 |
Gravitino catalog property names with the prefix `spark.bypass.` are passed to
Spark Iceberg connector. For example, using `spark.bypass.clients` to pass the
`clients` to the Spark Iceberg connector.
@@ -122,4 +123,8 @@ Iceberg catalog property `cache-enabled` is setting to
`false` internally and no
### S3
-You need to add s3 secret to the Spark configuration using
`spark.sql.catalog.${iceberg_catalog_name}.s3.access-key-id` and
`spark.sql.catalog.${iceberg_catalog_name}.s3.secret-access-key`. Additionally,
download the [Iceberg AWS
bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle)
and place it in the classpath of Spark.
\ No newline at end of file
+You need to add s3 secret to the Spark configuration using
`spark.sql.catalog.${iceberg_catalog_name}.s3.access-key-id` and
`spark.sql.catalog.${iceberg_catalog_name}.s3.secret-access-key`. Additionally,
download the [Iceberg AWS
bundle](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle)
and place it in the classpath of Spark.
+
+### OSS
+
+You need to add OSS secret key to the Spark configuration using
`spark.sql.catalog.${iceberg_catalog_name}.client.access-key-id` and
`spark.sql.catalog.${iceberg_catalog_name}.client.access-key-secret`.
Additionally, download the [Aliyun OSS
SDK](https://gosspublic.alicdn.com/sdks/java/aliyun_java_sdk_3.10.2.zip) and
copy `aliyun-sdk-oss-3.10.2.jar`, `hamcrest-core-1.1.jar`, `jdom2-2.0.6.jar` in
the classpath of Spark.
\ No newline at end of file
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index f278e95a4..a25ddfcae 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -155,6 +155,7 @@ rocksdbjni = { group = "org.rocksdb", name = "rocksdbjni",
version.ref = "rocksd
commons-collections4 = { group = "org.apache.commons", name =
"commons-collections4", version.ref = "commons-collections4" }
commons-collections3 = { group = "commons-collections", name =
"commons-collections", version.ref = "commons-collections3" }
commons-configuration1 = { group = "commons-configuration", name =
"commons-configuration", version.ref = "commons-configuration1" }
+iceberg-aliyun = { group = "org.apache.iceberg", name = "iceberg-aliyun",
version.ref = "iceberg" }
iceberg-aws = { group = "org.apache.iceberg", name = "iceberg-aws",
version.ref = "iceberg" }
iceberg-core = { group = "org.apache.iceberg", name = "iceberg-core",
version.ref = "iceberg" }
iceberg-api = { group = "org.apache.iceberg", name = "iceberg-api",
version.ref = "iceberg" }
diff --git a/iceberg/iceberg-common/build.gradle.kts
b/iceberg/iceberg-common/build.gradle.kts
index f01b61515..fcb0a2b1f 100644
--- a/iceberg/iceberg-common/build.gradle.kts
+++ b/iceberg/iceberg-common/build.gradle.kts
@@ -34,6 +34,7 @@ dependencies {
implementation(libs.caffeine)
implementation(libs.commons.lang3)
implementation(libs.guava)
+ implementation(libs.iceberg.aliyun)
implementation(libs.iceberg.aws)
implementation(libs.iceberg.hive.metastore)
implementation(libs.hadoop2.common) {
diff --git
a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java
b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java
index e99d28508..cc8921979 100644
---
a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java
+++
b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java
@@ -136,6 +136,27 @@ public class IcebergConfig extends Config implements
OverwriteDefaultConfig {
.stringConf()
.create();
+ public static final ConfigEntry<String> OSS_ENDPOINT =
+ new ConfigBuilder(IcebergConstants.GRAVITINO_OSS_ENDPOINT)
+ .doc("The endpoint of Aliyun OSS service")
+ .version(ConfigConstants.VERSION_0_7_0)
+ .stringConf()
+ .create();
+
+ public static final ConfigEntry<String> OSS_ACCESS_KEY_ID =
+ new ConfigBuilder(IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_ID)
+ .doc("The static access key ID used to access OSS data")
+ .version(ConfigConstants.VERSION_0_7_0)
+ .stringConf()
+ .create();
+
+ public static final ConfigEntry<String> OSS_ACCESS_KEY_SECRET =
+ new ConfigBuilder(IcebergConstants.GRAVITINO_OSS_ACCESS_KEY_SECRET)
+ .doc("The static secret access key used to access OSS data")
+ .version(ConfigConstants.VERSION_0_7_0)
+ .stringConf()
+ .create();
+
public static final ConfigEntry<String> ICEBERG_METRICS_STORE =
new ConfigBuilder(IcebergConstants.ICEBERG_METRICS_STORE)
.doc("The store to save Iceberg metrics")
diff --git
a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergTableOps.java
b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergTableOps.java
index 0720c0abb..aaed61bfd 100644
---
a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergTableOps.java
+++
b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/ops/IcebergTableOps.java
@@ -75,7 +75,10 @@ public class IcebergTableOps implements AutoCloseable {
IcebergConstants.AWS_S3_REGION,
IcebergConstants.ICEBERG_S3_ACCESS_KEY_ID,
IcebergConstants.ICEBERG_S3_SECRET_ACCESS_KEY,
- IcebergConstants.ICEBERG_S3_ENDPOINT);
+ IcebergConstants.ICEBERG_S3_ENDPOINT,
+ IcebergConstants.ICEBERG_OSS_ENDPOINT,
+ IcebergConstants.ICEBERG_OSS_ACCESS_KEY_ID,
+ IcebergConstants.ICEBERG_OSS_ACCESS_KEY_SECRET);
public IcebergTableOps(IcebergConfig icebergConfig) {
this.catalogBackend =