This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new fd9aa71 Glue Catalog: Basic Setup + Test Infra (1/3) (#294)
fd9aa71 is described below
commit fd9aa71193b68a48f7b082c49166203c0c8ff413
Author: Marvin Lanhenke <[email protected]>
AuthorDate: Mon Mar 25 08:02:57 2024 +0100
Glue Catalog: Basic Setup + Test Infra (1/3) (#294)
* extend dependency DIRS
* create dependencies for glue
* basic setup
* rename test
* add utils/get_sdk_config
* add tests
* add list_namespace
* fix: clippy
* fix: unused
* fix: workspace
* fix: name
* use creds in test-setup
* fix: empty dependencies.rust.tsv
* fix: rename endpoint_url
* remove deps.tsv
* add hms deps.tsv
* fix deps.tsv
* fix: deps.tsv
---
Cargo.toml | 2 +
crates/catalog/glue/Cargo.toml | 43 ++++++
crates/catalog/glue/README.md | 27 ++++
crates/catalog/glue/src/catalog.rs | 164 +++++++++++++++++++++
crates/catalog/glue/src/error.rs | 35 +++++
crates/catalog/glue/src/lib.rs | 28 ++++
crates/catalog/glue/src/utils.rs | 124 ++++++++++++++++
.../glue/testdata/glue_catalog/docker-compose.yaml | 24 +++
crates/catalog/glue/tests/glue_catalog_test.rs | 93 ++++++++++++
scripts/dependencies.py | 13 +-
10 files changed, 545 insertions(+), 8 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index c1cdcdf..ad886c3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,6 +42,8 @@ arrow-array = { version = "51" }
arrow-schema = { version = "51" }
async-stream = "0.3.5"
async-trait = "0.1"
+aws-config = "1.1.8"
+aws-sdk-glue = "1.21.0"
bimap = "0.6"
bitvec = "1.0.1"
bytes = "1.5"
diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml
new file mode 100644
index 0000000..daa9587
--- /dev/null
+++ b/crates/catalog/glue/Cargo.toml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "iceberg-catalog-glue"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+rust-version = { workspace = true }
+
+categories = ["database"]
+description = "Apache Iceberg Glue Catalog Support"
+repository = { workspace = true }
+license = { workspace = true }
+keywords = ["iceberg", "glue", "catalog"]
+
+[dependencies]
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+aws-config = { workspace = true }
+aws-sdk-glue = { workspace = true }
+iceberg = { workspace = true }
+log = { workspace = true }
+typed-builder = { workspace = true }
+
+[dev-dependencies]
+iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
+port_scanner = { workspace = true }
+tokio = { workspace = true }
diff --git a/crates/catalog/glue/README.md b/crates/catalog/glue/README.md
new file mode 100644
index 0000000..fb7f6bf
--- /dev/null
+++ b/crates/catalog/glue/README.md
@@ -0,0 +1,27 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+
+# Apache Iceberg Glue Catalog Official Native Rust Implementation
+
+[](https://crates.io/crates/iceberg-catalog-glue)
+[](https://docs.rs/iceberg/latest/iceberg-catalog-glue/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg
Glue Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-glue/latest) for
examples and the full API.
diff --git a/crates/catalog/glue/src/catalog.rs
b/crates/catalog/glue/src/catalog.rs
new file mode 100644
index 0000000..7c5e73a
--- /dev/null
+++ b/crates/catalog/glue/src/catalog.rs
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+use async_trait::async_trait;
+use iceberg::table::Table;
+use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCommit,
TableCreation, TableIdent};
+use std::{collections::HashMap, fmt::Debug};
+
+use typed_builder::TypedBuilder;
+
+use crate::error::from_aws_error;
+use crate::utils::create_sdk_config;
+
+#[derive(Debug, TypedBuilder)]
+/// Glue Catalog configuration
+pub struct GlueCatalogConfig {
+ #[builder(default, setter(strip_option))]
+ uri: Option<String>,
+ #[builder(default)]
+ props: HashMap<String, String>,
+}
+
+struct GlueClient(aws_sdk_glue::Client);
+
+/// Glue Catalog
+pub struct GlueCatalog {
+ config: GlueCatalogConfig,
+ client: GlueClient,
+}
+
+impl Debug for GlueCatalog {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("GlueCatalog")
+ .field("config", &self.config)
+ .finish_non_exhaustive()
+ }
+}
+
+impl GlueCatalog {
+ /// Create a new glue catalog
+ pub async fn new(config: GlueCatalogConfig) -> Self {
+ let sdk_config = create_sdk_config(&config.props,
config.uri.as_ref()).await;
+
+ let client = aws_sdk_glue::Client::new(&sdk_config);
+
+ GlueCatalog {
+ config,
+ client: GlueClient(client),
+ }
+ }
+}
+
+#[async_trait]
+impl Catalog for GlueCatalog {
+ async fn list_namespaces(
+ &self,
+ parent: Option<&NamespaceIdent>,
+ ) -> Result<Vec<NamespaceIdent>> {
+ if parent.is_some() {
+ return Ok(vec![]);
+ }
+
+ let mut database_list: Vec<NamespaceIdent> = Vec::new();
+ let mut next_token: Option<String> = None;
+
+ loop {
+ let resp = match &next_token {
+ Some(token) => self.client.0.get_databases().next_token(token),
+ None => self.client.0.get_databases(),
+ };
+ let resp = resp.send().await.map_err(from_aws_error)?;
+
+ let dbs: Vec<NamespaceIdent> = resp
+ .database_list()
+ .iter()
+ .map(|db| NamespaceIdent::new(db.name().to_string()))
+ .collect();
+ database_list.extend(dbs);
+
+ next_token = resp.next_token().map(ToOwned::to_owned);
+ if next_token.is_none() {
+ break;
+ }
+ }
+
+ Ok(database_list)
+ }
+
+ async fn create_namespace(
+ &self,
+ _namespace: &NamespaceIdent,
+ _properties: HashMap<String, String>,
+ ) -> Result<Namespace> {
+ todo!()
+ }
+
+ async fn get_namespace(&self, _namespace: &NamespaceIdent) ->
Result<Namespace> {
+ todo!()
+ }
+
+ async fn namespace_exists(&self, _namespace: &NamespaceIdent) ->
Result<bool> {
+ todo!()
+ }
+
+ async fn update_namespace(
+ &self,
+ _namespace: &NamespaceIdent,
+ _properties: HashMap<String, String>,
+ ) -> Result<()> {
+ todo!()
+ }
+
+ async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> {
+ todo!()
+ }
+
+ async fn list_tables(&self, _namespace: &NamespaceIdent) ->
Result<Vec<TableIdent>> {
+ todo!()
+ }
+
+ async fn create_table(
+ &self,
+ _namespace: &NamespaceIdent,
+ _creation: TableCreation,
+ ) -> Result<Table> {
+ todo!()
+ }
+
+ async fn load_table(&self, _table: &TableIdent) -> Result<Table> {
+ todo!()
+ }
+
+ async fn drop_table(&self, _table: &TableIdent) -> Result<()> {
+ todo!()
+ }
+
+ async fn table_exists(&self, _table: &TableIdent) -> Result<bool> {
+ todo!()
+ }
+
+ async fn rename_table(&self, _src: &TableIdent, _dest: &TableIdent) ->
Result<()> {
+ todo!()
+ }
+
+ async fn update_table(&self, _commit: TableCommit) -> Result<Table> {
+ todo!()
+ }
+}
diff --git a/crates/catalog/glue/src/error.rs b/crates/catalog/glue/src/error.rs
new file mode 100644
index 0000000..c9a2559
--- /dev/null
+++ b/crates/catalog/glue/src/error.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+use anyhow::anyhow;
+use std::fmt::Debug;
+
+use iceberg::{Error, ErrorKind};
+
+/// Format AWS SDK error into iceberg error
+pub fn from_aws_error<T>(error: aws_sdk_glue::error::SdkError<T>) -> Error
+where
+ T: Debug,
+{
+ Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting aws skd error".to_string(),
+ )
+ .with_source(anyhow!("aws sdk error: {:?}", error))
+}
diff --git a/crates/catalog/glue/src/lib.rs b/crates/catalog/glue/src/lib.rs
new file mode 100644
index 0000000..b274cf7
--- /dev/null
+++ b/crates/catalog/glue/src/lib.rs
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+#![deny(missing_docs)]
+
+mod catalog;
+mod error;
+mod utils;
+pub use catalog::*;
+pub use utils::{
+ AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION_NAME,
AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN,
+};
diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs
new file mode 100644
index 0000000..e824da8
--- /dev/null
+++ b/crates/catalog/glue/src/utils.rs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+use std::collections::HashMap;
+
+use aws_config::{BehaviorVersion, Region, SdkConfig};
+use aws_sdk_glue::config::Credentials;
+
+const _GLUE_ID: &str = "glue.id";
+const _GLUE_SKIP_ARCHIVE: &str = "glue.skip-archive";
+const _GLUE_SKIP_ARCHIVE_DEFAULT: bool = true;
+/// Property aws profile name
+pub const AWS_PROFILE_NAME: &str = "profile_name";
+/// Property aws region
+pub const AWS_REGION_NAME: &str = "region_name";
+/// Property aws access key
+pub const AWS_ACCESS_KEY_ID: &str = "aws_access_key_id";
+/// Property aws secret access key
+pub const AWS_SECRET_ACCESS_KEY: &str = "aws_secret_access_key";
+/// Property aws session token
+pub const AWS_SESSION_TOKEN: &str = "aws_session_token";
+
+/// Creates an AWS SDK configuration (SdkConfig) based on
+/// provided properties and an optional endpoint URL.
+pub(crate) async fn create_sdk_config(
+ properties: &HashMap<String, String>,
+ endpoint_url: Option<&String>,
+) -> SdkConfig {
+ let mut config = aws_config::defaults(BehaviorVersion::latest());
+
+ if let Some(endpoint) = endpoint_url {
+ config = config.endpoint_url(endpoint)
+ };
+
+ if properties.is_empty() {
+ return config.load().await;
+ }
+
+ if let (Some(access_key), Some(secret_key)) = (
+ properties.get(AWS_ACCESS_KEY_ID),
+ properties.get(AWS_SECRET_ACCESS_KEY),
+ ) {
+ let session_token = properties.get(AWS_SESSION_TOKEN).cloned();
+ let credentials_provider =
+ Credentials::new(access_key, secret_key, session_token, None,
"properties");
+
+ config = config.credentials_provider(credentials_provider)
+ };
+
+ if let Some(profile_name) = properties.get(AWS_PROFILE_NAME) {
+ config = config.profile_name(profile_name);
+ }
+
+ if let Some(region_name) = properties.get(AWS_REGION_NAME) {
+ let region = Region::new(region_name.clone());
+ config = config.region(region);
+ }
+
+ config.load().await
+}
+
+#[cfg(test)]
+mod tests {
+ use aws_sdk_glue::config::ProvideCredentials;
+
+ use super::*;
+
+ #[tokio::test]
+ async fn test_config_with_custom_endpoint() {
+ let properties = HashMap::new();
+ let endpoint_url = "http://custom_url:5000";
+
+ let sdk_config = create_sdk_config(&properties,
Some(&endpoint_url.to_string())).await;
+
+ let result = sdk_config.endpoint_url().unwrap();
+
+ assert_eq!(result, endpoint_url);
+ }
+
+ #[tokio::test]
+ async fn test_config_with_properties() {
+ let properties = HashMap::from([
+ (AWS_PROFILE_NAME.to_string(), "my_profile".to_string()),
+ (AWS_REGION_NAME.to_string(), "us-east-1".to_string()),
+ (AWS_ACCESS_KEY_ID.to_string(), "my-access-id".to_string()),
+ (
+ AWS_SECRET_ACCESS_KEY.to_string(),
+ "my-secret-key".to_string(),
+ ),
+ (AWS_SESSION_TOKEN.to_string(), "my-token".to_string()),
+ ]);
+
+ let sdk_config = create_sdk_config(&properties, None).await;
+
+ let region = sdk_config.region().unwrap().as_ref();
+ let credentials = sdk_config
+ .credentials_provider()
+ .unwrap()
+ .provide_credentials()
+ .await
+ .unwrap();
+
+ assert_eq!("us-east-1", region);
+ assert_eq!("my-access-id", credentials.access_key_id());
+ assert_eq!("my-secret-key", credentials.secret_access_key());
+ assert_eq!("my-token", credentials.session_token().unwrap());
+ }
+}
diff --git a/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml
b/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml
new file mode 100644
index 0000000..c24d2d7
--- /dev/null
+++ b/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version: '3.8'
+
+services:
+ moto:
+ image: motoserver/moto:5.0.3
+ expose:
+ - 5000
diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs
b/crates/catalog/glue/tests/glue_catalog_test.rs
new file mode 100644
index 0000000..24a9aff
--- /dev/null
+++ b/crates/catalog/glue/tests/glue_catalog_test.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for glue catalog.
+
+use std::collections::HashMap;
+
+use iceberg::{Catalog, Result};
+use iceberg_catalog_glue::{
+ GlueCatalog, GlueCatalogConfig, AWS_ACCESS_KEY_ID, AWS_REGION_NAME,
AWS_SECRET_ACCESS_KEY,
+};
+use iceberg_test_utils::docker::DockerCompose;
+use iceberg_test_utils::{normalize_test_name, set_up};
+use port_scanner::scan_port_addr;
+use tokio::time::sleep;
+
+const GLUE_CATALOG_PORT: u16 = 5000;
+
+#[derive(Debug)]
+struct TestFixture {
+ _docker_compose: DockerCompose,
+ glue_catalog: GlueCatalog,
+}
+
+async fn set_test_fixture(func: &str) -> TestFixture {
+ set_up();
+
+ let docker_compose = DockerCompose::new(
+ normalize_test_name(format!("{}_{func}", module_path!())),
+ format!("{}/testdata/glue_catalog", env!("CARGO_MANIFEST_DIR")),
+ );
+
+ docker_compose.run();
+
+ let glue_catalog_ip = docker_compose.get_container_ip("moto");
+
+ let read_port = format!("{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT);
+ loop {
+ if !scan_port_addr(&read_port) {
+ log::info!("Waiting for 1s glue catalog to ready...");
+ sleep(std::time::Duration::from_millis(1000)).await;
+ } else {
+ break;
+ }
+ }
+
+ let props = HashMap::from([
+ (AWS_ACCESS_KEY_ID.to_string(), "my_access_id".to_string()),
+ (
+ AWS_SECRET_ACCESS_KEY.to_string(),
+ "my_secret_key".to_string(),
+ ),
+ (AWS_REGION_NAME.to_string(), "us-east-1".to_string()),
+ ]);
+
+ let config = GlueCatalogConfig::builder()
+ .uri(format!("http://{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT))
+ .props(props)
+ .build();
+
+ let glue_catalog = GlueCatalog::new(config).await;
+
+ TestFixture {
+ _docker_compose: docker_compose,
+ glue_catalog,
+ }
+}
+
+#[tokio::test]
+async fn test_list_namespace() -> Result<()> {
+ let fixture = set_test_fixture("test_list_namespace").await;
+
+ let expected = vec![];
+ let result = fixture.glue_catalog.list_namespaces(None).await?;
+
+ assert_eq!(result, expected);
+
+ Ok(())
+}
diff --git a/scripts/dependencies.py b/scripts/dependencies.py
index 13bb445..33d3de0 100644
--- a/scripts/dependencies.py
+++ b/scripts/dependencies.py
@@ -20,11 +20,9 @@ from argparse import ArgumentParser,
ArgumentDefaultsHelpFormatter, REMAINDER
import subprocess
import os
-
DIRS = [
- "crates/iceberg",
- "crates/catalog/hms",
- "crates/catalog/rest"
+ "crates/iceberg", "crates/catalog/hms", "crates/catalog/rest",
+ "crates/catalog/glue"
]
@@ -54,10 +52,9 @@ if __name__ == "__main__":
parser.set_defaults(func=parser.print_help)
subparsers = parser.add_subparsers()
- parser_check = subparsers.add_parser(
- 'check',
- description="Check dependencies",
- help="Check dependencies")
+ parser_check = subparsers.add_parser('check',
+ description="Check dependencies",
+ help="Check dependencies")
parser_check.set_defaults(func=check_deps)
parser_generate = subparsers.add_parser(