This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new e984b1e  Metadata integration tests (#57)
e984b1e is described below

commit e984b1e7c01fb5b17c8a1635cec82ab833d22e7d
Author: JanKaul <[email protected]>
AuthorDate: Fri Sep 15 09:56:11 2023 +0200

    Metadata integration tests (#57)
    
    * add test for valid metadata v2
    
    * add test for minimal valid table metadata v2
    
    * test valied v1 table metadata
    
    * Test for invalid schema id
    
    * improve failing test
    
    * missing sort oder test
    
    * test for missing partition spec
    
    * test missing partition id
    
    * test missing schemas
    
    * test unsupported version
    
    * fix changes
    
    * improve error message
    
    * fix clippy warnings
---
 crates/iceberg/src/spec/snapshot.rs                |   2 +-
 crates/iceberg/src/spec/table_metadata.rs          | 439 +++++++++++++++++++--
 .../TableMetadataUnsupportedVersion.json           |  36 ++
 .../table_metadata/TableMetadataV1Valid.json       |  42 ++
 .../TableMetadataV2CurrentSchemaNotFound.json      |  88 +++++
 .../TableMetadataV2MissingLastPartitionId.json     |  73 ++++
 .../TableMetadataV2MissingPartitionSpecs.json      |  67 ++++
 .../TableMetadataV2MissingSchemas.json             |  71 ++++
 .../TableMetadataV2MissingSortOrder.json           |  54 +++
 .../table_metadata/TableMetadataV2Valid.json       | 122 ++++++
 .../TableMetadataV2ValidMinimal.json               |  71 ++++
 11 files changed, 1042 insertions(+), 23 deletions(-)

diff --git a/crates/iceberg/src/spec/snapshot.rs 
b/crates/iceberg/src/spec/snapshot.rs
index 9a80288..f38a605 100644
--- a/crates/iceberg/src/spec/snapshot.rs
+++ b/crates/iceberg/src/spec/snapshot.rs
@@ -77,7 +77,7 @@ pub struct Snapshot {
     /// A string map that summarizes the snapshot changes, including operation.
     summary: Summary,
     /// ID of the table’s current schema when the snapshot was created.
-    #[builder(setter(strip_option))]
+    #[builder(setter(strip_option), default = "None")]
     schema_id: Option<i64>,
 }
 
diff --git a/crates/iceberg/src/spec/table_metadata.rs 
b/crates/iceberg/src/spec/table_metadata.rs
index ebf7cca..f40b63e 100644
--- a/crates/iceberg/src/spec/table_metadata.rs
+++ b/crates/iceberg/src/spec/table_metadata.rs
@@ -39,6 +39,7 @@ use _serde::TableMetadataEnum;
 
 static MAIN_BRANCH: &str = "main";
 static DEFAULT_SPEC_ID: i32 = 0;
+static DEFAULT_SORT_ORDER_ID: i64 = 0;
 
 #[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
 #[serde(try_from = "TableMetadataEnum", into = "TableMetadataEnum")]
@@ -202,6 +203,7 @@ pub(super) mod _serde {
     /// [TableMetadataV1] and [TableMetadataV2] are internal struct that are 
only used for serialization and deserialization.
     use std::{collections::HashMap, sync::Arc};
 
+    use itertools::Itertools;
     use serde::{Deserialize, Serialize};
     use uuid::Uuid;
 
@@ -211,11 +213,12 @@ pub(super) mod _serde {
             snapshot::_serde::{SnapshotV1, SnapshotV2},
             PartitionField, PartitionSpec, Schema, SnapshotReference, 
SnapshotRetention, SortOrder,
         },
-        Error,
+        Error, ErrorKind,
     };
 
     use super::{
-        FormatVersion, MetadataLog, SnapshotLog, TableMetadata, 
DEFAULT_SPEC_ID, MAIN_BRANCH,
+        FormatVersion, MetadataLog, SnapshotLog, TableMetadata, 
DEFAULT_SORT_ORDER_ID,
+        DEFAULT_SPEC_ID, MAIN_BRANCH,
     };
 
     #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
@@ -288,8 +291,8 @@ pub(super) mod _serde {
         pub snapshot_log: Option<Vec<SnapshotLog>>,
         #[serde(skip_serializing_if = "Option::is_none")]
         pub metadata_log: Option<Vec<MetadataLog>>,
-        pub sort_orders: Vec<SortOrder>,
-        pub default_sort_order_id: i64,
+        pub sort_orders: Option<Vec<SortOrder>>,
+        pub default_sort_order_id: Option<i64>,
     }
 
     /// Helper to serialize and deserialize the format version.
@@ -346,6 +349,13 @@ pub(super) mod _serde {
             } else {
                 value.current_snapshot_id
             };
+            let schemas = HashMap::from_iter(
+                value
+                    .schemas
+                    .into_iter()
+                    .map(|schema| Ok((schema.schema_id, 
Arc::new(schema.try_into()?))))
+                    .collect::<Result<Vec<_>, Error>>()?,
+            );
             Ok(TableMetadata {
                 format_version: FormatVersion::V2,
                 table_uuid: value.table_uuid,
@@ -353,14 +363,18 @@ pub(super) mod _serde {
                 last_sequence_number: value.last_sequence_number,
                 last_updated_ms: value.last_updated_ms,
                 last_column_id: value.last_column_id,
-                schemas: HashMap::from_iter(
-                    value
-                        .schemas
-                        .into_iter()
-                        .map(|schema| Ok((schema.schema_id, 
Arc::new(schema.try_into()?))))
-                        .collect::<Result<Vec<_>, Error>>()?,
-                ),
-                current_schema_id: value.current_schema_id,
+                current_schema_id: if 
schemas.keys().contains(&value.current_schema_id) {
+                    Ok(value.current_schema_id)
+                } else {
+                    Err(self::Error::new(
+                        ErrorKind::DataInvalid,
+                        format!(
+                            "No schema exists with the current schema id {}.",
+                            value.current_schema_id
+                        ),
+                    ))
+                }?,
+                schemas,
                 partition_specs: HashMap::from_iter(
                     value.partition_specs.into_iter().map(|x| (x.spec_id, x)),
                 ),
@@ -479,10 +493,13 @@ pub(super) mod _serde {
                     .transpose()?,
                 snapshot_log: value.snapshot_log.unwrap_or_default(),
                 metadata_log: value.metadata_log.unwrap_or_default(),
-                sort_orders: HashMap::from_iter(
-                    value.sort_orders.into_iter().map(|x| (x.order_id, x)),
-                ),
-                default_sort_order_id: value.default_sort_order_id,
+                sort_orders: match value.sort_orders {
+                    Some(sort_orders) => {
+                        HashMap::from_iter(sort_orders.into_iter().map(|x| 
(x.order_id, x)))
+                    }
+                    None => HashMap::new(),
+                },
+                default_sort_order_id: 
value.default_sort_order_id.unwrap_or(DEFAULT_SORT_ORDER_ID),
                 refs: HashMap::from_iter(vec![(
                     MAIN_BRANCH.to_string(),
                     SnapshotReference {
@@ -613,8 +630,8 @@ pub(super) mod _serde {
                 } else {
                     Some(v.metadata_log)
                 },
-                sort_orders: v.sort_orders.into_values().collect(),
-                default_sort_order_id: v.default_sort_order_id,
+                sort_orders: Some(v.sort_orders.into_values().collect()),
+                default_sort_order_id: Some(v.default_sort_order_id),
             }
         }
     }
@@ -653,7 +670,7 @@ pub struct SnapshotLog {
 #[cfg(test)]
 mod tests {
 
-    use std::{collections::HashMap, sync::Arc};
+    use std::{collections::HashMap, fs, sync::Arc};
 
     use anyhow::Result;
     use uuid::Uuid;
@@ -661,9 +678,9 @@ mod tests {
     use pretty_assertions::assert_eq;
 
     use crate::spec::{
-        table_metadata::TableMetadata, ManifestList, NestedField, Operation, 
PartitionField,
-        PartitionSpec, PrimitiveType, Schema, Snapshot, SnapshotReference, 
SnapshotRetention,
-        SortOrder, Summary, Transform, Type,
+        table_metadata::TableMetadata, ManifestList, NestedField, NullOrder, 
Operation,
+        PartitionField, PartitionSpec, PrimitiveType, Schema, Snapshot, 
SnapshotReference,
+        SnapshotRetention, SortDirection, SortField, SortOrder, Summary, 
Transform, Type,
     };
 
     use super::{FormatVersion, MetadataLog, SnapshotLog};
@@ -972,4 +989,382 @@ mod tests {
         assert!(serde_json::from_str::<TableMetadata>(data).is_err());
         Ok(())
     }
+
+    #[test]
+    fn test_table_metadata_v2_file_valid() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2Valid.json").unwrap();
+
+        let schema1 = Schema::builder()
+            .with_schema_id(0)
+            .with_fields(vec![Arc::new(NestedField::required(
+                1,
+                "x",
+                Type::Primitive(PrimitiveType::Long),
+            ))])
+            .build()
+            .unwrap();
+
+        let schema2 = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                Arc::new(NestedField::required(
+                    1,
+                    "x",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+                Arc::new(
+                    NestedField::required(2, "y", 
Type::Primitive(PrimitiveType::Long))
+                        .with_doc("comment"),
+                ),
+                Arc::new(NestedField::required(
+                    3,
+                    "z",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+            ])
+            .with_identifier_field_ids(vec![1, 2])
+            .build()
+            .unwrap();
+
+        let partition_spec = PartitionSpec::builder()
+            .with_spec_id(0)
+            .with_partition_field(PartitionField {
+                name: "x".to_string(),
+                transform: Transform::Identity,
+                source_id: 1,
+                field_id: 1000,
+            })
+            .build()
+            .unwrap();
+
+        let sort_order = SortOrder::builder()
+            .with_order_id(3)
+            .with_sort_field(SortField {
+                source_id: 2,
+                transform: Transform::Identity,
+                direction: SortDirection::Ascending,
+                null_order: NullOrder::First,
+            })
+            .with_sort_field(SortField {
+                source_id: 3,
+                transform: Transform::Bucket(4),
+                direction: SortDirection::Descending,
+                null_order: NullOrder::Last,
+            })
+            .build()
+            .unwrap();
+
+        let snapshot1 = Snapshot::builder()
+            .with_snapshot_id(3051729675574597004)
+            .with_timestamp_ms(1515100955770)
+            .with_sequence_number(0)
+            .with_manifest_list(ManifestList::ManifestListFile(
+                "s3://a/b/1.avro".to_string(),
+            ))
+            .with_summary(Summary {
+                operation: Operation::Append,
+                other: HashMap::new(),
+            })
+            .build()
+            .unwrap();
+
+        let snapshot2 = Snapshot::builder()
+            .with_snapshot_id(3055729675574597004)
+            .with_parent_snapshot_id(Some(3051729675574597004))
+            .with_timestamp_ms(1555100955770)
+            .with_sequence_number(1)
+            .with_schema_id(1)
+            .with_manifest_list(ManifestList::ManifestListFile(
+                "s3://a/b/2.avro".to_string(),
+            ))
+            .with_summary(Summary {
+                operation: Operation::Append,
+                other: HashMap::new(),
+            })
+            .build()
+            .unwrap();
+
+        let expected = TableMetadata {
+            format_version: FormatVersion::V2,
+            table_uuid: 
Uuid::parse_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap(),
+            location: "s3://bucket/test/location".to_string(),
+            last_updated_ms: 1602638573590,
+            last_column_id: 3,
+            schemas: HashMap::from_iter(vec![(0, Arc::new(schema1)), (1, 
Arc::new(schema2))]),
+            current_schema_id: 1,
+            partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+            default_spec_id: 0,
+            last_partition_id: 1000,
+            default_sort_order_id: 3,
+            sort_orders: HashMap::from_iter(vec![(3, sort_order)]),
+            snapshots: Some(HashMap::from_iter(vec![
+                (3051729675574597004, Arc::new(snapshot1)),
+                (3055729675574597004, Arc::new(snapshot2)),
+            ])),
+            current_snapshot_id: Some(3055729675574597004),
+            last_sequence_number: 34,
+            properties: HashMap::new(),
+            snapshot_log: vec![
+                SnapshotLog {
+                    snapshot_id: 3051729675574597004,
+                    timestamp_ms: 1515100955770,
+                },
+                SnapshotLog {
+                    snapshot_id: 3055729675574597004,
+                    timestamp_ms: 1555100955770,
+                },
+            ],
+            metadata_log: Vec::new(),
+            refs: HashMap::from_iter(vec![(
+                "main".to_string(),
+                SnapshotReference {
+                    snapshot_id: 3055729675574597004,
+                    retention: SnapshotRetention::Branch {
+                        min_snapshots_to_keep: None,
+                        max_snapshot_age_ms: None,
+                        max_ref_age_ms: None,
+                    },
+                },
+            )]),
+        };
+
+        check_table_metadata_serde(&metadata, expected);
+    }
+
+    #[test]
+    fn test_table_metadata_v2_file_valid_minimal() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2ValidMinimal.json").unwrap();
+
+        let schema = Schema::builder()
+            .with_schema_id(0)
+            .with_fields(vec![
+                Arc::new(NestedField::required(
+                    1,
+                    "x",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+                Arc::new(
+                    NestedField::required(2, "y", 
Type::Primitive(PrimitiveType::Long))
+                        .with_doc("comment"),
+                ),
+                Arc::new(NestedField::required(
+                    3,
+                    "z",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+            ])
+            .build()
+            .unwrap();
+
+        let partition_spec = PartitionSpec::builder()
+            .with_spec_id(0)
+            .with_partition_field(PartitionField {
+                name: "x".to_string(),
+                transform: Transform::Identity,
+                source_id: 1,
+                field_id: 1000,
+            })
+            .build()
+            .unwrap();
+
+        let sort_order = SortOrder::builder()
+            .with_order_id(3)
+            .with_sort_field(SortField {
+                source_id: 2,
+                transform: Transform::Identity,
+                direction: SortDirection::Ascending,
+                null_order: NullOrder::First,
+            })
+            .with_sort_field(SortField {
+                source_id: 3,
+                transform: Transform::Bucket(4),
+                direction: SortDirection::Descending,
+                null_order: NullOrder::Last,
+            })
+            .build()
+            .unwrap();
+
+        let expected = TableMetadata {
+            format_version: FormatVersion::V2,
+            table_uuid: 
Uuid::parse_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap(),
+            location: "s3://bucket/test/location".to_string(),
+            last_updated_ms: 1602638573590,
+            last_column_id: 3,
+            schemas: HashMap::from_iter(vec![(0, Arc::new(schema))]),
+            current_schema_id: 0,
+            partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+            default_spec_id: 0,
+            last_partition_id: 1000,
+            default_sort_order_id: 3,
+            sort_orders: HashMap::from_iter(vec![(3, sort_order)]),
+            snapshots: None,
+            current_snapshot_id: None,
+            last_sequence_number: 34,
+            properties: HashMap::new(),
+            snapshot_log: vec![],
+            metadata_log: Vec::new(),
+            refs: HashMap::new(),
+        };
+
+        check_table_metadata_serde(&metadata, expected);
+    }
+
+    #[test]
+    fn test_table_metadata_v1_file_valid() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();
+
+        let schema = Schema::builder()
+            .with_schema_id(0)
+            .with_fields(vec![
+                Arc::new(NestedField::required(
+                    1,
+                    "x",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+                Arc::new(
+                    NestedField::required(2, "y", 
Type::Primitive(PrimitiveType::Long))
+                        .with_doc("comment"),
+                ),
+                Arc::new(NestedField::required(
+                    3,
+                    "z",
+                    Type::Primitive(PrimitiveType::Long),
+                )),
+            ])
+            .build()
+            .unwrap();
+
+        let partition_spec = PartitionSpec::builder()
+            .with_spec_id(0)
+            .with_partition_field(PartitionField {
+                name: "x".to_string(),
+                transform: Transform::Identity,
+                source_id: 1,
+                field_id: 1000,
+            })
+            .build()
+            .unwrap();
+
+        let expected = TableMetadata {
+            format_version: FormatVersion::V1,
+            table_uuid: 
Uuid::parse_str("d20125c8-7284-442c-9aea-15fee620737c").unwrap(),
+            location: "s3://bucket/test/location".to_string(),
+            last_updated_ms: 1602638573874,
+            last_column_id: 3,
+            schemas: HashMap::from_iter(vec![(0, Arc::new(schema))]),
+            current_schema_id: 0,
+            partition_specs: HashMap::from_iter(vec![(0, partition_spec)]),
+            default_spec_id: 0,
+            last_partition_id: 0,
+            default_sort_order_id: 0,
+            sort_orders: HashMap::new(),
+            snapshots: Some(HashMap::new()),
+            current_snapshot_id: None,
+            last_sequence_number: 0,
+            properties: HashMap::new(),
+            snapshot_log: vec![],
+            metadata_log: Vec::new(),
+            refs: HashMap::from_iter(vec![(
+                "main".to_string(),
+                SnapshotReference {
+                    snapshot_id: -1,
+                    retention: SnapshotRetention::Branch {
+                        min_snapshots_to_keep: None,
+                        max_snapshot_age_ms: None,
+                        max_ref_age_ms: None,
+                    },
+                },
+            )]),
+        };
+
+        check_table_metadata_serde(&metadata, expected);
+    }
+
+    #[test]
+    fn test_table_metadata_v2_schema_not_found() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json")
+                .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "DataInvalid => No schema exists with the current schema id 2."
+        )
+    }
+
+    #[test]
+    fn test_table_metadata_v2_missing_sort_order() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingSortOrder.json")
+                .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "data did not match any variant of untagged enum TableMetadataEnum"
+        )
+    }
+
+    #[test]
+    fn test_table_metadata_v2_missing_partition_specs() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json")
+                .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "data did not match any variant of untagged enum TableMetadataEnum"
+        )
+    }
+
+    #[test]
+    fn test_table_metadata_v2_missing_last_partition_id() {
+        let metadata = fs::read_to_string(
+            
"testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json",
+        )
+        .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "data did not match any variant of untagged enum TableMetadataEnum"
+        )
+    }
+
+    #[test]
+    fn test_table_metadata_v2_missing_schemas() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataV2MissingSchemas.json")
+                .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "data did not match any variant of untagged enum TableMetadataEnum"
+        )
+    }
+
+    #[test]
+    fn test_table_metadata_v2_unsupported_version() {
+        let metadata =
+            
fs::read_to_string("testdata/table_metadata/TableMetadataUnsupportedVersion.json")
+                .unwrap();
+
+        let desered: Result<TableMetadata, serde_json::Error> = 
serde_json::from_str(&metadata);
+
+        assert_eq!(
+            desered.unwrap_err().to_string(),
+            "data did not match any variant of untagged enum TableMetadataEnum"
+        )
+    }
 }
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json
new file mode 100644
index 0000000..0633a71
--- /dev/null
+++ 
b/crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json
@@ -0,0 +1,36 @@
+{
+  "format-version": 3,
+  "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+  "location": "s3://bucket/test/location",
+  "last-updated-ms": 1602638573874,
+  "last-sequence-number": 0,
+  "last-column-id": 3,
+  "schema": {
+    "type": "struct",
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  },
+  "partition-spec": [],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": []
+}
\ No newline at end of file
diff --git a/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json
new file mode 100644
index 0000000..0b55d51
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json
@@ -0,0 +1,42 @@
+{
+  "format-version": 1,
+  "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+  "location": "s3://bucket/test/location",
+  "last-updated-ms": 1602638573874,
+  "last-column-id": 3,
+  "schema": {
+    "type": "struct",
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  },
+  "partition-spec": [
+    {
+      "name": "x",
+      "transform": "identity",
+      "source-id": 1,
+      "field-id": 1000
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
new file mode 100644
index 0000000..d010785
--- /dev/null
+++ 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json
@@ -0,0 +1,88 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 2,
+  "schemas": [
+    {
+      "type": "struct",
+      "schema-id": 0,
+      "fields": [
+        {
+          "id": 1,
+          "name": "x",
+          "required": true,
+          "type": "long"
+        }
+      ]
+    },
+    {
+      "type": "struct",
+      "schema-id": 1,
+      "fields": [
+        {
+          "id": 1,
+          "name": "x",
+          "required": true,
+          "type": "long"
+        },
+        {
+          "id": 2,
+          "name": "y",
+          "required": true,
+          "type": "long",
+          "doc": "comment"
+        },
+        {
+          "id": 3,
+          "name": "z",
+          "required": true,
+          "type": "long"
+        }
+      ]
+    }
+  ],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "last-partition-id": 1000,
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
new file mode 100644
index 0000000..31c2b4c
--- /dev/null
+++ 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json
@@ -0,0 +1,73 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 0,
+  "schemas": [{
+    "type": "struct",
+    "schema-id": 0,
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  }],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
new file mode 100644
index 0000000..3ab0a7a
--- /dev/null
+++ 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json
@@ -0,0 +1,67 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 0,
+  "schemas": [{
+    "type": "struct",
+    "schema-id": 0,
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  }],
+  "partition-spec": [
+    {
+      "name": "x",
+      "transform": "identity",
+      "source-id": 1,
+      "field-id": 1000
+    }
+  ],
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json
new file mode 100644
index 0000000..3754354
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json
@@ -0,0 +1,71 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "schema": {
+    "type": "struct",
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  },
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json
new file mode 100644
index 0000000..fbbcf41
--- /dev/null
+++ 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json
@@ -0,0 +1,54 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 0,
+  "schemas": [{
+    "type": "struct",
+    "schema-id": 0,
+    "fields": [
+      {
+        "id": 1,
+        "name": "x",
+        "required": true,
+        "type": "long"
+      },
+      {
+        "id": 2,
+        "name": "y",
+        "required": true,
+        "type": "long",
+        "doc": "comment"
+      },
+      {
+        "id": 3,
+        "name": "z",
+        "required": true,
+        "type": "long"
+      }
+    ]
+  }],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "last-partition-id": 1000,
+  "properties": {},
+  "current-snapshot-id": -1,
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git a/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json
new file mode 100644
index 0000000..0dc89de
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2Valid.json
@@ -0,0 +1,122 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 1,
+  "schemas": [
+    {
+      "type": "struct",
+      "schema-id": 0,
+      "fields": [
+        {
+          "id": 1,
+          "name": "x",
+          "required": true,
+          "type": "long"
+        }
+      ]
+    },
+    {
+      "type": "struct",
+      "schema-id": 1,
+      "identifier-field-ids": [
+        1,
+        2
+      ],
+      "fields": [
+        {
+          "id": 1,
+          "name": "x",
+          "required": true,
+          "type": "long"
+        },
+        {
+          "id": 2,
+          "name": "y",
+          "required": true,
+          "type": "long",
+          "doc": "comment"
+        },
+        {
+          "id": 3,
+          "name": "z",
+          "required": true,
+          "type": "long"
+        }
+      ]
+    }
+  ],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "last-partition-id": 1000,
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": 3055729675574597004,
+  "snapshots": [
+    {
+      "snapshot-id": 3051729675574597004,
+      "timestamp-ms": 1515100955770,
+      "sequence-number": 0,
+      "summary": {
+        "operation": "append"
+      },
+      "manifest-list": "s3://a/b/1.avro"
+    },
+    {
+      "snapshot-id": 3055729675574597004,
+      "parent-snapshot-id": 3051729675574597004,
+      "timestamp-ms": 1555100955770,
+      "sequence-number": 1,
+      "summary": {
+        "operation": "append"
+      },
+      "manifest-list": "s3://a/b/2.avro",
+      "schema-id": 1
+    }
+  ],
+  "snapshot-log": [
+    {
+      "snapshot-id": 3051729675574597004,
+      "timestamp-ms": 1515100955770
+    },
+    {
+      "snapshot-id": 3055729675574597004,
+      "timestamp-ms": 1555100955770
+    }
+  ],
+  "metadata-log": []
+}
\ No newline at end of file
diff --git 
a/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json 
b/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json
new file mode 100644
index 0000000..529b10d
--- /dev/null
+++ b/crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json
@@ -0,0 +1,71 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 0,
+  "schemas": [
+    {
+      "type": "struct",
+      "schema-id": 0,
+      "fields": [
+        {
+          "id": 1,
+          "name": "x",
+          "required": true,
+          "type": "long"
+        },
+        {
+          "id": 2,
+          "name": "y",
+          "required": true,
+          "type": "long",
+          "doc": "comment"
+        },
+        {
+          "id": 3,
+          "name": "z",
+          "required": true,
+          "type": "long"
+        }
+      ]
+    }
+  ],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {
+          "name": "x",
+          "transform": "identity",
+          "source-id": 1,
+          "field-id": 1000
+        }
+      ]
+    }
+  ],
+  "last-partition-id": 1000,
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {
+          "transform": "identity",
+          "source-id": 2,
+          "direction": "asc",
+          "null-order": "nulls-first"
+        },
+        {
+          "transform": "bucket[4]",
+          "source-id": 3,
+          "direction": "desc",
+          "null-order": "nulls-last"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file


Reply via email to