c-thiel commented on code in PR #2188:
URL: https://github.com/apache/iceberg-rust/pull/2188#discussion_r3363021501
##########
crates/iceberg/src/spec/schema/mod.rs:
##########
@@ -421,6 +425,85 @@ impl Schema {
pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
&self.id_to_field
}
+
+ /// Minimum [`FormatVersion`] required to represent all *types* in this
schema.
+ ///
+ /// Types only; for initial-default version floors see
[`Schema::check_format_compatibility`].
+ pub fn min_format_version(&self) -> FormatVersion {
+ // `id_to_field` is flattened, so the max over all fields covers
nested ones too.
+ self.id_to_field
+ .values()
+ .map(|f| leaf_min_format_version(&f.field_type))
+ .max()
+ .unwrap_or(FormatVersion::V1)
+ }
+
+ /// Returns an error listing every field incompatible with
`format_version`.
+ /// Mirrors Java's `Schema.checkCompatibility()`. Two checks per field:
+ ///
+ /// - **Type** — per `leaf_min_format_version`.
+ /// - **Initial default** — a non-null `initial_default` backfills
pre-existing rows,
+ /// so it requires [`MIN_FORMAT_VERSION_DEFAULT_VALUES`];
`write_default` is not
+ /// checked, as it only affects newly written rows (read identically at
any version).
+ pub fn check_format_compatibility(&self, format_version: FormatVersion) ->
Result<()> {
+ // (field id, message); sorted by id below for a deterministic error.
+ let mut problems: Vec<(i32, String)> = Vec::new();
+
+ // `id_to_field` is flattened, so checking each field by its own type
keeps the
+ // blame on the offending leaf, not its container (mirrors Java's
`lazyIdToField`).
+ for field in self.id_to_field.values() {
+ let min_version = leaf_min_format_version(&field.field_type);
+ if format_version < min_version {
+ let name = self
+ .name_by_field_id(field.id)
+ .unwrap_or(field.name.as_str());
+ problems.push((field.id, format!(
+ "Invalid type for {name}: {} is not supported until
{min_version} but format version is {format_version}.",
+ field.field_type,
+ )));
+ }
+
+ if let Some(default) = &field.initial_default
+ && format_version < MIN_FORMAT_VERSION_DEFAULT_VALUES
+ {
+ let name = self
+ .name_by_field_id(field.id)
+ .unwrap_or(field.name.as_str());
+ problems.push((field.id, format!(
+ "Invalid initial default for {name}: non-null default
({default:?}) is not supported until {MIN_FORMAT_VERSION_DEFAULT_VALUES} but
format version is {format_version}."
+ )));
+ }
+ }
+
+ if problems.is_empty() {
+ return Ok(());
+ }
+
+ // Stable sort by id: HashMap order is nondeterministic, and stability
keeps a
+ // field's type problem before its default problem (matches Java's
TreeMap order).
+ let message = problems
+ .into_iter()
+ .sorted_by_key(|(id, _)| *id)
+ .map(|(_, msg)| msg)
+ .join("\n- ");
+ Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Invalid schema for {format_version}:\n- {message}"),
+ ))
+ }
+}
+
+/// Minimum [`FormatVersion`] required by a type itself, ignoring nested
fields.
+///
+/// `TimestampNs` / `TimestamptzNs` / `Variant` require v3; everything else
(including
+/// nested types, validated per-leaf elsewhere) is valid from v1. Single
source of truth
+/// for the type version rules, mirroring Java's `Schema.MIN_FORMAT_VERSIONS`.
+fn leaf_min_format_version(field_type: &Type) -> FormatVersion {
Review Comment:
https://github.com/apache/iceberg-rust/pull/2188/commits/15884b99c2faffd770c00865280bbe574b0c526e
moved the shallow per-type rule onto `Type::min_format_version` - just like it
was originally, just with the fixed recursion from the last review.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]