Xuanwo commented on code in PR #54: URL: https://github.com/apache/iceberg-rust/pull/54#discussion_r1312554574
########## crates/iceberg/src/catalog.rs: ########## @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Catalog API for Apache Iceberg + +use crate::spec::{PartitionSpec, Schema, SortOrder, TableMetadata}; +use crate::Result; +use async_trait::async_trait; +use std::collections::HashMap; + +/// The catalog API for Iceberg Rust. +#[async_trait] +pub trait Catalog { + /// Get the catalog configuration for specified warehouse. + async fn get_config(&self, warehouse: Option<&str>) -> Result<CatalogConfig>; + + /// List namespaces from table. + async fn list_namespaces(&self, parent: Option<NamespaceIdent>) -> Result<Vec<NamespaceIdent>>; + + /// Create a new namespace inside the catalog. + async fn create_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap<String, String>, + ) -> Result<Namespace>; + + /// Get a namespace information from the catalog. + async fn get_namespace(&self, namespace: NamespaceIdent) -> Result<Namespace>; + + /// Update a namespace inside the catalog. + /// + /// # Behavior + /// + /// The properties must be the full set of namespace. + async fn update_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap<String, String>, + ) -> Result<()>; + + /// Drop a namespace from the catalog. + async fn drop_namespace(&self, namespace: NamespaceIdent) -> Result<()>; + + /// List tables from namespace. + async fn list_tables(&self, namespace: NamespaceIdent) -> Result<Vec<TableIdent>>; + + /// Create a new table inside the namespace. + async fn create_table( + &self, + namespace: NamespaceIdent, + creation: TableCreation, + ) -> Result<Table>; + + /// Load table from the catalog. + async fn load_table(&self, table: TableIdent) -> Result<Table>; + + /// Drop a table from the catalog. + async fn drop_table(&self, table: TableIdent) -> Result<()>; + + /// Check if a table exists in the catalog. + async fn stat_table(&self, table: TableIdent) -> Result<bool>; + + /// Rename a table in the catalog. + async fn rename_table(&self, src: TableIdent, dest: TableIdent) -> Result<()>; + + /// Commit a table to the catalog. + async fn commit_table(&self, table: TableIdent, commit: TableCommit) -> Result<Table>; + + /// Commit multiple tables to the catalog as an atomic operation. + async fn commit_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; +} + +/// The config the catalog. +pub struct CatalogConfig { + defaults: HashMap<String, String>, + overrides: HashMap<String, String>, +} + +/// NamespaceIdent represents the identifier of a namespace in the catalog. +pub struct NamespaceIdent(Vec<String>); + +/// Namespace represents a namespace in the catalog. +pub struct Namespace { + name: NamespaceIdent, + properties: HashMap<String, String>, +} + +/// TableIdent represents the identifier of a table in the catalog. +pub struct TableIdent { + namespace: NamespaceIdent, + name: String, +} + +/// Table represents a table in the catalog. +pub struct Table { Review Comment: > I mean whether it will provide method like getting a task writer or it just represent the metadata. Personally, I believe that they should be two separate structs. --- I like the delta-rs's style like [`WriteBuilder`](https://github.com/delta-io/delta-rs/blob/main/rust/src/operations/write.rs#L109) to handle operations seperately instead a global state. I think we can discuss about this topic later. ########## crates/iceberg/src/catalog.rs: ########## @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Catalog API for Apache Iceberg + +use crate::spec::{PartitionSpec, Schema, SortOrder, TableMetadata}; +use crate::Result; +use async_trait::async_trait; +use std::collections::HashMap; + +/// The catalog API for Iceberg Rust. +#[async_trait] +pub trait Catalog { + /// Get the catalog configuration for specified warehouse. + async fn get_config(&self, warehouse: Option<&str>) -> Result<CatalogConfig>; + + /// List namespaces from table. + async fn list_namespaces(&self, parent: Option<NamespaceIdent>) -> Result<Vec<NamespaceIdent>>; + + /// Create a new namespace inside the catalog. + async fn create_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap<String, String>, + ) -> Result<Namespace>; + + /// Get a namespace information from the catalog. + async fn get_namespace(&self, namespace: NamespaceIdent) -> Result<Namespace>; + + /// Update a namespace inside the catalog. + /// + /// # Behavior + /// + /// The properties must be the full set of namespace. + async fn update_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap<String, String>, + ) -> Result<()>; + + /// Drop a namespace from the catalog. + async fn drop_namespace(&self, namespace: NamespaceIdent) -> Result<()>; + + /// List tables from namespace. + async fn list_tables(&self, namespace: NamespaceIdent) -> Result<Vec<TableIdent>>; + + /// Create a new table inside the namespace. + async fn create_table( + &self, + namespace: NamespaceIdent, + creation: TableCreation, + ) -> Result<Table>; + + /// Load table from the catalog. + async fn load_table(&self, table: TableIdent) -> Result<Table>; + + /// Drop a table from the catalog. + async fn drop_table(&self, table: TableIdent) -> Result<()>; + + /// Check if a table exists in the catalog. + async fn stat_table(&self, table: TableIdent) -> Result<bool>; + + /// Rename a table in the catalog. + async fn rename_table(&self, src: TableIdent, dest: TableIdent) -> Result<()>; + + /// Commit a table to the catalog. + async fn commit_table(&self, table: TableIdent, commit: TableCommit) -> Result<Table>; + + /// Commit multiple tables to the catalog as an atomic operation. + async fn commit_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; +} + +/// The config the catalog. +pub struct CatalogConfig { + defaults: HashMap<String, String>, + overrides: HashMap<String, String>, +} + +/// NamespaceIdent represents the identifier of a namespace in the catalog. +pub struct NamespaceIdent(Vec<String>); + +/// Namespace represents a namespace in the catalog. +pub struct Namespace { + name: NamespaceIdent, + properties: HashMap<String, String>, +} + +/// TableIdent represents the identifier of a table in the catalog. +pub struct TableIdent { + namespace: NamespaceIdent, + name: String, +} + +/// Table represents a table in the catalog. +pub struct Table { Review Comment: > I mean whether it will provide method like getting a task writer or it just represent the metadata. Personally, I believe that they should be two separate structs. --- I like the delta-rs's style like [`WriteBuilder`](https://github.com/delta-io/delta-rs/blob/main/rust/src/operations/write.rs#L109) to handle operations seperately instead of maintaining a global state. I think we can discuss about this topic later. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
