This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new e984b1e Metadata integration tests (#57)
e984b1e is described below
commit e984b1e7c01fb5b17c8a1635cec82ab833d22e7d
Author: JanKaul <[email protected]>
AuthorDate: Fri Sep 15 09:56:11 2023 +0200
Metadata integration tests (#57)
* add test for valid metadata v2
* add test for minimal valid table metadata v2
* test valied v1 table metadata
* Test for invalid schema id
* improve failing test
* missing sort oder test
* test for missing partition spec
* test missing partition id
* test missing schemas
* test unsupported version
* fix changes
* improve error message
* fix clippy warnings
---
crates/iceberg/src/spec/snapshot.rs | 2 +-
crates/iceberg/src/spec/table_metadata.rs | 439 +++++++++++++++++++--
.../TableMetadataUnsupportedVersion.json | 36 ++
.../table_metadata/TableMetadataV1Valid.json | 42 ++
.../TableMetadataV2CurrentSchemaNotFound.json | 88 +++++
.../TableMetadataV2MissingLastPartitionId.json | 73 ++++
.../TableMetadataV2MissingPartitionSpecs.json | 67 ++++
.../TableMetadataV2MissingSchemas.json | 71 ++++
.../TableMetadataV2MissingSortOrder.json | 54 +++
.../table_metadata/TableMetadataV2Valid.json | 122 ++++++
.../TableMetadataV2ValidMinimal.json | 71 ++++
11 files changed, 1042 insertions(+), 23 deletions(-)
diff --git a/crates/iceberg/src/spec/snapshot.rs
b/crates/iceberg/src/spec/snapshot.rs
index 9a80288..f38a605 100644
--- a/crates/iceberg/src/spec/snapshot.rs
+++ b/crates/iceberg/src/spec/snapshot.rs
@@ -77,7 +77,7 @@ pub struct Snapshot {
/// A string map that summarizes the snapshot changes, including operation.
summary: Summary,
/// ID of the table’s current schema when the snapshot was created.
- #[builder(setter(strip_option))]
+ #[builder(setter(strip_option), default = "None")]
schema_id: Option<i64>,
}
diff --git a/crates/iceberg/src/spec/table_metadata.rs
b/crates/iceberg/src/spec/table_metadata.rs
index ebf7cca..f40b63e 100644
--- a/crates/iceberg/src/spec/table_metadata.rs
+++ b/crates/iceberg/src/spec/table_metadata.rs
@@ -39,6 +39,7 @@ use _serde::TableMetadataEnum;
static MAIN_BRANCH: &str = "main";
static DEFAULT_SPEC_ID: i32 = 0;
+static DEFAULT_SORT_ORDER_ID: i64 = 0;
#[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
#[serde(try_from = "TableMetadataEnum", into = "TableMetadataEnum")]
@@ -202,6 +203,7 @@ pub(super) mod _serde {
/// [TableMetadataV1] and [TableMetadataV2] are internal struct that are
only used for serialization and deserialization.
use std::{collections::HashMap, sync::Arc};
+ use itertools::Itertools;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
@@ -211,11 +213,12 @@ pub(super) mod _serde {
snapshot::_serde::{SnapshotV1, SnapshotV2},
PartitionField, PartitionSpec, Schema, SnapshotReference,
SnapshotRetention, SortOrder,
},
- Error,
+ Error, ErrorKind,
};
use super::{
- FormatVersion, MetadataLog, SnapshotLog, TableMetadata,
DEFAULT_SPEC_ID, MAIN_BRANCH,
+ FormatVersion, MetadataLog, SnapshotLog, TableMetadata,
DEFAULT_SORT_ORDER_ID,
+ DEFAULT_SPEC_ID, MAIN_BRANCH,
};
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
@@ -288,8 +291,8 @@ pub(super) mod _serde {
pub snapshot_log: Option<Vec<SnapshotLog>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata_log: Option<Vec<MetadataLog>>,
- pub sort_orders: Vec<SortOrder>,
- pub default_sort_order_id: i64,
+ pub sort_orders: Option<Vec<SortOrder>>,
+ pub default_sort_order_id: Option<i64>,
}
/// Helper to serialize and deserialize the format version.
@@ -346,6 +349,13 @@ pub(super) mod _serde {
} else {
value.current_snapshot_id
};
+ let schemas = HashMap::from_iter(
+ value
+ .schemas
+ .into_iter()
+ .map(|schema| Ok((schema.schema_id,
Arc::new(schema.try_into()?))))
+ .collect::<Result<Vec<_>, Error>>()?,
+ );
Ok(TableMetadata {
format_version: FormatVersion::V2,
table_uuid: value.table_uuid,
@@ -353,14 +363,18 @@ pub(super) mod _serde {
last_sequence_number: value.last_sequence_number,
last_updated_ms: value.last_updated_ms,
last_column_id: value.last_column_id,
- schemas: HashMap::from_iter(
- value
- .schemas
- .into_iter()
- .map(|schema| Ok((schema.schema_id,
Arc::new(schema.try_into()?))))
- .collect::<Result<Vec<_>, Error>>()?,
- ),
- current_schema_id: value.current_schema_id,
+ current_schema_id: if
schemas.keys().contains(&value.current_schema_id) {
+ Ok(value.current_schema_id)
+ } else {
+ Err(self::Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "No schema exists with the current schema id {}.",
+ value.current_schema_id
+ ),
+ ))
+ }?,
+ schemas,
partition_specs: HashMap::from_iter(
value.partition_specs.into_iter().map(|x| (x.spec_id, x)),
),
@@ -479,10 +493,13 @@ pub(super) mod _serde {
.transpose()?,
snapshot_log: value.snapshot_log.unwrap_or_default(),
metadata_log: value.metadata_log.unwrap_or_default(),
- sort_orders: HashMap::from_iter(
- value.sort_orders.into_iter().map(|x| (x.order_id, x)),
- ),
- default_sort_order_id: value.default_sort_order_id,
+ sort_orders: match value.sort_orders {
+ Some(sort_orders) => {
+ HashMap::from_iter(sort_orders.into_iter().map(|x|
(x.order_id, x)))
+ }
+ None => HashMap::new(),
+ },
+ default_sort_order_id:
value.default_sort_order_id.unwrap_or(DEFAULT_SORT_ORDER_ID),
refs: HashMap::from_iter(vec![(
MAIN_BRANCH.to_string(),
SnapshotReference {
@@ -613,8 +630,8 @@ pub(super) mod _serde {
} else {
Some(v.metadata_log)
},
- sort_orders: v.sort_orders.into_values().collect(),
- default_sort_order_id: v.default_sort_order_id,
+ sort_orders: Some(v.sort_orders.into_values().collect()),
+ default_sort_order_id: Some(v.default_sort_order_id),
}
}
}
@@ -653,7 +670,7 @@ pub struct SnapshotLog {
#[cfg(test)]
mod tests {
- use std::{collections::HashMap, sync::Arc};
+ use std::{collections::HashMap, fs, sync::Arc};
use anyhow::Result;
use uuid::Uuid;
@@ -661,9 +678,9 @@ mod tests {
use pretty_assertions::assert_eq;
use crate::spec::{
- table_metadata::TableMetadata, ManifestList, NestedField, Operation,
PartitionField,
- PartitionSpec, PrimitiveType, Schema, Snapshot, SnapshotReference,
SnapshotRetention,
- SortOrder, Summary, Transform, Type,
+ table_metadata::TableMetadata, ManifestList, NestedField, NullOrder,
Operation,
+ PartitionField, PartitionSpec, PrimitiveType, Schema, Snapshot,
SnapshotReference,
+ SnapshotRetention, SortDirection, SortField, SortOrder, Summary,
Transform, Type,
};
use super::{FormatVersion, MetadataLog, SnapshotLog};
@@ -972,4 +989,382 @@ mod tests {
assert!(serde_json::from_str::<TableMetadata>(data).is_err());
Ok(())
}
+
+ #[test]
+ fn test_table_metadata_v2_file_valid() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2Valid.json").unwrap();
+
+ let schema1 = Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![Arc::new(NestedField::required(
+ 1,
+ "x",
+ Type::Primitive(PrimitiveType::Long),
+ ))])
+ .build()
+ .unwrap();
+
+ let schema2 = Schema::builder()
+ .with_schema_id(1)
+ .with_fields(vec![
+ Arc::new(NestedField::required(
+ 1,
+ "x",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ Arc::new(
+ NestedField::required(2, "y",
Type::Primitive(PrimitiveType::Long))
+ .with_doc("comment"),
+ ),
+ Arc::new(NestedField::required(
+ 3,
+ "z",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ ])
+ .with_identifier_field_ids(vec![1, 2])
+ .build()
+ .unwrap();
+
+ let partition_spec = PartitionSpec::builder()
+ .with_spec_id(0)
+ .with_partition_field(PartitionField {
+ name: "x".to_string(),
+ transform: Transform::Identity,
+ source_id: 1,
+ field_id: 1000,
+ })
+ .build()
+ .unwrap();
+
+ let sort_order = SortOrder::builder()
+ .with_order_id(3)
+ .with_sort_field(SortField {
+ source_id: 2,
+ transform: Transform::Identity,
+ direction: SortDirection::Ascending,
+ null_order: NullOrder::First,
+ })
+ .with_sort_field(SortField {
+ source_id: 3,
+ transform: Transform::Bucket(4),
+ direction: SortDirection::Descending,
+ null_order: NullOrder::Last,
+ })
+ .build()
+ .unwrap();
+
+ let snapshot1 = Snapshot::builder()
+ .with_snapshot_id(3051729675574597004)
+ .with_timestamp_ms(1515100955770)
+ .with_sequence_number(0)
+ .with_manifest_list(ManifestList::ManifestListFile(
+ "s3://a/b/1.avro".to_string(),
+ ))
+ .with_summary(Summary {
+ operation: Operation::Append,
+ other: HashMap::new(),
+ })
+ .build()
+ .unwrap();
+
+ let snapshot2 = Snapshot::builder()
+ .with_snapshot_id(3055729675574597004)
+ .with_parent_snapshot_id(Some(3051729675574597004))
+ .with_timestamp_ms(1555100955770)
+ .with_sequence_number(1)
+ .with_schema_id(1)
+ .with_manifest_list(ManifestList::ManifestListFile(
+ "s3://a/b/2.avro".to_string(),
+ ))
+ .with_summary(Summary {
+ operation: Operation::Append,
+ other: HashMap::new(),
+ })
+ .build()
+ .unwrap();
+
+ let expected = TableMetadata {
+ format_version: FormatVersion::V2,
+ table_uuid:
Uuid::parse_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap(),
+ location: "s3://bucket/test/location".to_string(),
+ last_updated_ms: 1602638573590,
+ last_column_id: 3,
+ schemas: HashMap::from_iter(vec![(0, Arc::new(schema1)), (1,
Arc::new(schema2))]),
+ current_schema_id: 1,
+ partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+ default_spec_id: 0,
+ last_partition_id: 1000,
+ default_sort_order_id: 3,
+ sort_orders: HashMap::from_iter(vec![(3, sort_order)]),
+ snapshots: Some(HashMap::from_iter(vec![
+ (3051729675574597004, Arc::new(snapshot1)),
+ (3055729675574597004, Arc::new(snapshot2)),
+ ])),
+ current_snapshot_id: Some(3055729675574597004),
+ last_sequence_number: 34,
+ properties: HashMap::new(),
+ snapshot_log: vec![
+ SnapshotLog {
+ snapshot_id: 3051729675574597004,
+ timestamp_ms: 1515100955770,
+ },
+ SnapshotLog {
+ snapshot_id: 3055729675574597004,
+ timestamp_ms: 1555100955770,
+ },
+ ],
+ metadata_log: Vec::new(),
+ refs: HashMap::from_iter(vec![(
+ "main".to_string(),
+ SnapshotReference {
+ snapshot_id: 3055729675574597004,
+ retention: SnapshotRetention::Branch {
+ min_snapshots_to_keep: None,
+ max_snapshot_age_ms: None,
+ max_ref_age_ms: None,
+ },
+ },
+ )]),
+ };
+
+ check_table_metadata_serde(&metadata, expected);
+ }
+
+ #[test]
+ fn test_table_metadata_v2_file_valid_minimal() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2ValidMinimal.json").unwrap();
+
+ let schema = Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![
+ Arc::new(NestedField::required(
+ 1,
+ "x",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ Arc::new(
+ NestedField::required(2, "y",
Type::Primitive(PrimitiveType::Long))
+ .with_doc("comment"),
+ ),
+ Arc::new(NestedField::required(
+ 3,
+ "z",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ ])
+ .build()
+ .unwrap();
+
+ let partition_spec = PartitionSpec::builder()
+ .with_spec_id(0)
+ .with_partition_field(PartitionField {
+ name: "x".to_string(),
+ transform: Transform::Identity,
+ source_id: 1,
+ field_id: 1000,
+ })
+ .build()
+ .unwrap();
+
+ let sort_order = SortOrder::builder()
+ .with_order_id(3)
+ .with_sort_field(SortField {
+ source_id: 2,
+ transform: Transform::Identity,
+ direction: SortDirection::Ascending,
+ null_order: NullOrder::First,
+ })
+ .with_sort_field(SortField {
+ source_id: 3,
+ transform: Transform::Bucket(4),
+ direction: SortDirection::Descending,
+ null_order: NullOrder::Last,
+ })
+ .build()
+ .unwrap();
+
+ let expected = TableMetadata {
+ format_version: FormatVersion::V2,
+ table_uuid:
Uuid::parse_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap(),
+ location: "s3://bucket/test/location".to_string(),
+ last_updated_ms: 1602638573590,
+ last_column_id: 3,
+ schemas: HashMap::from_iter(vec![(0, Arc::new(schema))]),
+ current_schema_id: 0,
+ partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+ default_spec_id: 0,
+ last_partition_id: 1000,
+ default_sort_order_id: 3,
+ sort_orders: HashMap::from_iter(vec![(3, sort_order)]),
+ snapshots: None,
+ current_snapshot_id: None,
+ last_sequence_number: 34,
+ properties: HashMap::new(),
+ snapshot_log: vec![],
+ metadata_log: Vec::new(),
+ refs: HashMap::new(),
+ };
+
+ check_table_metadata_serde(&metadata, expected);
+ }
+
+ #[test]
+ fn test_table_metadata_v1_file_valid() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();
+
+ let schema = Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![
+ Arc::new(NestedField::required(
+ 1,
+ "x",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ Arc::new(
+ NestedField::required(2, "y",
Type::Primitive(PrimitiveType::Long))
+ .with_doc("comment"),
+ ),
+ Arc::new(NestedField::required(
+ 3,
+ "z",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ ])
+ .build()
+ .unwrap();
+
+ let partition_spec = PartitionSpec::builder()
+ .with_spec_id(0)
+ .with_partition_field(PartitionField {
+ name: "x".to_string(),
+ transform: Transform::Identity,
+ source_id: 1,
+ field_id: 1000,
+ })
+ .build()
+ .unwrap();
+
+ let expected = TableMetadata {
+ format_version: FormatVersion::V1,
+ table_uuid:
Uuid::parse_str("d20125c8-7284-442c-9aea-15fee620737c").unwrap(),
+ location: "s3://bucket/test/location".to_string(),
+ last_updated_ms: 1602638573874,
+ last_column_id: 3,
+ schemas: HashMap::from_iter(vec![(0, Arc::new(schema))]),
+ current_schema_id: 0,
+ partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+ default_spec_id: 0,
+ last_partition_id: 0,
+ default_sort_order_id: 0,
+ sort_orders: HashMap::new(),
+ snapshots: Some(HashMap::new()),
+ current_snapshot_id: None,
+ last_sequence_number: 0,
+ properties: HashMap::new(),
+ snapshot_log: vec![],
+ metadata_log: Vec::new(),
+ refs: HashMap::from_iter(vec![(
+ "main".to_string(),
+ SnapshotReference {
+ snapshot_id: -1,
+ retention: SnapshotRetention::Branch {
+ min_snapshots_to_keep: None,
+ max_snapshot_age_ms: None,
+ max_ref_age_ms: None,
+ },
+ },
+ )]),
+ };
+
+ check_table_metadata_serde(&metadata, expected);
+ }
+
+ #[test]
+ fn test_table_metadata_v2_schema_not_found() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json")
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "DataInvalid => No schema exists with the current schema id 2."
+ )
+ }
+
+ #[test]
+ fn test_table_metadata_v2_missing_sort_order() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingSortOrder.json")
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "data did not match any variant of untagged enum TableMetadataEnum"
+ )
+ }
+
+ #[test]
+ fn test_table_metadata_v2_missing_partition_specs() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json")
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "data did not match any variant of untagged enum TableMetadataEnum"
+ )
+ }
+
+ #[test]
+ fn test_table_metadata_v2_missing_last_partition_id() {
+ let metadata = fs::read_to_string(
+
"testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json",
+ )
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "data did not match any variant of untagged enum TableMetadataEnum"
+ )
+ }
+
+ #[test]
+ fn test_table_metadata_v2_missing_schemas() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingSchemas.json")
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "data did not match any variant of untagged enum TableMetadataEnum"
+ )
+ }
+
+ #[test]
+ fn test_table_metadata_v2_unsupported_version() {
+ let metadata =
+
fs::read_to_string("testdata/table_metadata/TableMetadataUnsupportedVersion.json")
+ .unwrap();
+
+ let desered: Result<TableMetadata, serde_json::Error> =
serde_json::from_str(&metadata);
+
+ assert_eq!(
+ desered.unwrap_err().to_string(),
+ "data did not match any variant of untagged enum TableMetadataEnum"
+ )
+ }
}
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json
b/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json
new file mode 100644
index 0000000..0633a71
--- /dev/null
+++
b/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json
@@ -0,0 +1,36 @@
+{
+ "format-version": 3,
+ "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+ "location": "s3://bucket/test/location",
+ "last-updated-ms": 1602638573874,
+ "last-sequence-number": 0,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ "partition-spec": [],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": []
+}
\ No newline at end of file
diff --git a/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json
new file mode 100644
index 0000000..0b55d51
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json
@@ -0,0 +1,42 @@
+{
+ "format-version": 1,
+ "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+ "location": "s3://bucket/test/location",
+ "last-updated-ms": 1602638573874,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ "partition-spec": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
new file mode 100644
index 0000000..d010785
--- /dev/null
+++
b/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
@@ -0,0 +1,88 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 2,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
new file mode 100644
index 0000000..31c2b4c
--- /dev/null
+++
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
@@ -0,0 +1,73 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 0,
+ "schemas": [{
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
new file mode 100644
index 0000000..3ab0a7a
--- /dev/null
+++
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
@@ -0,0 +1,67 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 0,
+ "schemas": [{
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }],
+ "partition-spec": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ],
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json
new file mode 100644
index 0000000..3754354
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json
@@ -0,0 +1,71 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json
new file mode 100644
index 0000000..fbbcf41
--- /dev/null
+++
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json
@@ -0,0 +1,54 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 0,
+ "schemas": [{
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "properties": {},
+ "current-snapshot-id": -1,
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git a/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json
new file mode 100644
index 0000000..0dc89de
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json
@@ -0,0 +1,122 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 1,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [
+ 1,
+ 2
+ ],
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": 3055729675574597004,
+ "snapshots": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/1.avro"
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "parent-snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1555100955770,
+ "sequence-number": 1,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/2.avro",
+ "schema-id": 1
+ }
+ ],
+ "snapshot-log": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "timestamp-ms": 1555100955770
+ }
+ ],
+ "metadata-log": []
+}
\ No newline at end of file
diff --git
a/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json
b/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json
new file mode 100644
index 0000000..529b10d
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json
@@ -0,0 +1,71 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file