This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 13281d3 feat: Add Catalog API (#54)
13281d3 is described below
commit 13281d3cfb0e7281df4c0208b492fe1b44e8a7dd
Author: Xuanwo <[email protected]>
AuthorDate: Thu Sep 21 16:29:26 2023 +0800
feat: Add Catalog API (#54)
* feat: Add Catalog API
Signed-off-by: Xuanwo <[email protected]>
* remove get config
Signed-off-by: Xuanwo <[email protected]>
* Fix naming
Signed-off-by: Xuanwo <[email protected]>
* Use ref instead
Signed-off-by: Xuanwo <[email protected]>
* Move table out
Signed-off-by: Xuanwo <[email protected]>
* Fix typo
Signed-off-by: Xuanwo <[email protected]>
* Update crates/iceberg/src/spec/schema.rs
Co-authored-by: Renjie Liu <[email protected]>
* Make partition_spec optional
Signed-off-by: Xuanwo <[email protected]>
* Update crates/iceberg/src/table.rs
Co-authored-by: Fokko Driesprong <[email protected]>
* Fix sort
Signed-off-by: Xuanwo <[email protected]>
* Remove config
Signed-off-by: Xuanwo <[email protected]>
* Make clippy happy
Signed-off-by: Xuanwo <[email protected]>
---------
Signed-off-by: Xuanwo <[email protected]>
Co-authored-by: Renjie Liu <[email protected]>
Co-authored-by: Fokko Driesprong <[email protected]>
---
crates/iceberg/Cargo.toml | 1 +
crates/iceberg/src/catalog.rs | 149 ++++++++++++++++++++++++++++++++
crates/iceberg/src/lib.rs | 7 ++
crates/iceberg/src/spec/schema.rs | 2 +-
crates/iceberg/src/{lib.rs => table.rs} | 20 ++---
5 files changed, 165 insertions(+), 14 deletions(-)
diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml
index 007c5c2..4a4839d 100644
--- a/crates/iceberg/Cargo.toml
+++ b/crates/iceberg/Cargo.toml
@@ -29,6 +29,7 @@ keywords = ["iceberg"]
[dependencies]
anyhow = "1.0.72"
apache-avro = "0.15"
+async-trait = "0.1"
bimap = "0.6"
bitvec = "1.0.1"
chrono = "0.4"
diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs
new file mode 100644
index 0000000..f525539
--- /dev/null
+++ b/crates/iceberg/src/catalog.rs
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Catalog API for Apache Iceberg
+
+use crate::spec::{PartitionSpec, Schema, SortOrder};
+use crate::table::Table;
+use crate::Result;
+use async_trait::async_trait;
+use std::collections::HashMap;
+
+/// The catalog API for Iceberg Rust.
+#[async_trait]
+pub trait Catalog {
+ /// List namespaces from table.
+ async fn list_namespaces(&self, parent: Option<&NamespaceIdent>)
+ -> Result<Vec<NamespaceIdent>>;
+
+ /// Create a new namespace inside the catalog.
+ async fn create_namespace(
+ &self,
+ namespace: &NamespaceIdent,
+ properties: HashMap<String, String>,
+ ) -> Result<Namespace>;
+
+ /// Get a namespace information from the catalog.
+ async fn get_namespace(&self, namespace: &NamespaceIdent) ->
Result<Namespace>;
+
+ /// Update a namespace inside the catalog.
+ ///
+ /// # Behavior
+ ///
+ /// The properties must be the full set of namespace.
+ async fn update_namespace(
+ &self,
+ namespace: &NamespaceIdent,
+ properties: HashMap<String, String>,
+ ) -> Result<()>;
+
+ /// Drop a namespace from the catalog.
+ async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()>;
+
+ /// List tables from namespace.
+ async fn list_tables(&self, namespace: &NamespaceIdent) ->
Result<Vec<TableIdent>>;
+
+ /// Create a new table inside the namespace.
+ async fn create_table(
+ &self,
+ namespace: &NamespaceIdent,
+ creation: TableCreation,
+ ) -> Result<Table>;
+
+ /// Load table from the catalog.
+ async fn load_table(&self, table: &TableIdent) -> Result<Table>;
+
+ /// Drop a table from the catalog.
+ async fn drop_table(&self, table: &TableIdent) -> Result<()>;
+
+ /// Check if a table exists in the catalog.
+ async fn stat_table(&self, table: &TableIdent) -> Result<bool>;
+
+ /// Rename a table in the catalog.
+ async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) ->
Result<()>;
+
+ /// Update a table to the catalog.
+ async fn update_table(&self, table: &TableIdent, commit: TableCommit) ->
Result<Table>;
+
+ /// Update multiple tables to the catalog as an atomic operation.
+ async fn update_tables(&self, tables: &[(TableIdent, TableCommit)]) ->
Result<()>;
+}
+
+/// NamespaceIdent represents the identifier of a namespace in the catalog.
+pub struct NamespaceIdent(Vec<String>);
+
+/// Namespace represents a namespace in the catalog.
+pub struct Namespace {
+ name: NamespaceIdent,
+ properties: HashMap<String, String>,
+}
+
+/// TableIdent represents the identifier of a table in the catalog.
+pub struct TableIdent {
+ namespace: NamespaceIdent,
+ name: String,
+}
+
+/// TableCreation represents the creation of a table in the catalog.
+pub struct TableCreation {
+ name: String,
+ location: String,
+ schema: Schema,
+ partition_spec: Option<PartitionSpec>,
+ sort_order: SortOrder,
+ properties: HashMap<String, String>,
+}
+
+/// TableCommit represents the commit of a table in the catalog.
+pub struct TableCommit {
+ ident: TableIdent,
+ requirements: Vec<TableRequirement>,
+ updates: Vec<TableUpdate>,
+}
+
+/// TableRequirement represents a requirement for a table in the catalog.
+pub enum TableRequirement {
+ /// The table must not already exist; used for create transactions
+ NotExist,
+ /// The table UUID must match the requirement.
+ UuidMatch(String),
+ /// The table branch or tag identified by the requirement's `reference`
must
+ /// reference the requirement's `snapshot-id`.
+ RefSnapshotIdMatch {
+ /// The reference of the table to assert.
+ reference: String,
+ /// The snapshot id of the table to assert.
+ /// If the id is `None`, the ref must not already exist.
+ snapshot_id: Option<i64>,
+ },
+ /// The table's last assigned column id must match the requirement.
+ LastAssignedFieldIdMatch(i64),
+ /// The table's current schema id must match the requirement.
+ CurrentSchemaIdMatch(i64),
+ /// The table's last assigned partition id must match the
+ /// requirement.
+ LastAssignedPartitionIdMatch(i64),
+ /// The table's default spec id must match the requirement.
+ DefaultSpecIdMatch(i64),
+ /// The table's default sort order id must match the requirement.
+ DefaultSortOrderIdMatch(i64),
+}
+
+/// TableUpdate represents an update to a table in the catalog.
+///
+/// TODO: we should fill with UpgradeFormatVersionUpdate, AddSchemaUpdate and
so on.
+pub enum TableUpdate {}
diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs
index 93413d7..573b58e 100644
--- a/crates/iceberg/src/lib.rs
+++ b/crates/iceberg/src/lib.rs
@@ -27,6 +27,13 @@ pub use error::Error;
pub use error::ErrorKind;
pub use error::Result;
+/// There is no implementation for this trait, allow dead code for now, should
+/// be removed after we have one.
+#[allow(dead_code)]
+pub mod catalog;
+#[allow(dead_code)]
+pub mod table;
+
mod avro;
pub mod io;
pub mod spec;
diff --git a/crates/iceberg/src/spec/schema.rs
b/crates/iceberg/src/spec/schema.rs
index 2e9ead2..cef2dcc 100644
--- a/crates/iceberg/src/spec/schema.rs
+++ b/crates/iceberg/src/spec/schema.rs
@@ -60,7 +60,7 @@ pub struct SchemaBuilder {
}
impl SchemaBuilder {
- /// Add fields to schem builder.
+ /// Add fields to schema builder.
pub fn with_fields(mut self, fields: impl IntoIterator<Item =
NestedFieldRef>) -> Self {
self.fields.extend(fields);
self
diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/table.rs
similarity index 77%
copy from crates/iceberg/src/lib.rs
copy to crates/iceberg/src/table.rs
index 93413d7..ebe6753 100644
--- a/crates/iceberg/src/lib.rs
+++ b/crates/iceberg/src/table.rs
@@ -15,18 +15,12 @@
// specific language governing permissions and limitations
// under the License.
-//! Native Rust implementation of Apache Iceberg
+//! Table API for Apache Iceberg
-#![deny(missing_docs)]
+use crate::spec::TableMetadata;
-#[macro_use]
-extern crate derive_builder;
-
-mod error;
-pub use error::Error;
-pub use error::ErrorKind;
-pub use error::Result;
-
-mod avro;
-pub mod io;
-pub mod spec;
+/// Table represents a table in the catalog.
+pub struct Table {
+ metadata_location: String,
+ metadata: TableMetadata,
+}