liurenjie1024 commented on code in PR #25:
URL: https://github.com/apache/iceberg-rust/pull/25#discussion_r1288061943


##########
crates/iceberg/src/spec/schema.rs:
##########
@@ -48,14 +68,98 @@ impl SchemaBuilder {
         self
     }
 
+    /// Set identifier field ids.
+    pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = 
i32>) -> Self {
+        self.identifier_field_ids.extend(ids);
+        self
+    }
+
+    /// Set alias to filed id mapping.
+    pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
+        self.alias_to_id = alias_to_id;
+        self
+    }
+
     /// Builds the schema.
-    pub fn build(self) -> Schema {
+    pub fn build(self) -> Result<Schema> {
         let highest_field_id = self.fields.iter().map(|f| 
f.id).max().unwrap_or(0);
-        Schema {
-            r#struct: StructType::new(self.fields),
+
+        let r#struct = StructType::new(self.fields);
+        let id_to_field = index_by_id(&r#struct)?;
+
+        Self::validate_identifier_ids(
+            &r#struct,
+            &id_to_field,
+            self.identifier_field_ids.iter().copied(),
+        )?;
+
+        let (name_to_id, id_to_name) = {
+            let mut index = IndexByName::default();
+            visit_struct(&r#struct, &mut index)?;
+            index.indexes()
+        };
+
+        Ok(Schema {
+            r#struct,
             schema_id: self.schema_id,
             highest_field_id,
+            identifier_field_ids: self.identifier_field_ids,
+
+            alias_to_id: self.alias_to_id,
+            id_to_field,
+
+            name_to_id,
+            id_to_name,
+            lower_case_name_to_id: OnceCell::default(),
+        })
+    }
+
+    fn validate_identifier_ids(
+        r#struct: &StructType,
+        id_to_field: &HashMap<i32, NestedFieldRef>,
+        identifier_field_ids: impl Iterator<Item = i32>,
+    ) -> Result<()> {
+        let id_to_parent = index_parents(r#struct);
+        for identifier_field_id in identifier_field_ids {
+            let field = id_to_field.get(&identifier_field_id)
+                .ok_or_else(|| Error::new(ErrorKind::DataInvalid, 
format!("Cannot add fieldId {identifier_field_id} as an identifier field: field 
does not exist") ))?;
+            ensure_data_valid!(
+                field.required,
+                "Cannot add field {} as an identifier field: not a required 
field",
+                field.name
+            );
+            if let Type::Primitive(p) = field.field_type.as_ref() {
+                ensure_data_valid!(
+                    !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
+                    "Cannot add field {} as an identifier field: must not be 
float or double field",
+                    field.name
+                );
+            } else {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Cannot add field {} as an identifier field: not a 
primitive type field",
+                        field.name
+                    ),
+                ));
+            }
+
+            let mut cur_field_id = identifier_field_id;
+            while let Some(parent) = id_to_parent.get(&cur_field_id) {
+                let parent_field = id_to_field.get(parent)
+                    .ok_or_else(|| Error::new(ErrorKind::DataInvalid, 
format!("Cannot add fieldId {identifier_field_id} as an identifier field: 
parent field does not exist") ))?;
+                ensure_data_valid!(
+                    parent_field.field_type.is_struct(),
+                    "Cannot add field {} as an identifier field: must not be 
nested in {:?}",

Review Comment:
   I don't get your point here, a field may be deeply nested: `Struct { List { 
Struct { Field }}}`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to