Xuanwo commented on code in PR #12:
URL: https://github.com/apache/iceberg-rust/pull/12#discussion_r1274664933
##########
src/spec/datatypes.rs:
##########
@@ -190,21 +194,87 @@ impl fmt::Display for PrimitiveType {
}
/// DataType for a specific struct
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[derive(Debug, Serialize, PartialEq, Eq, Clone)]
#[serde(rename = "struct", tag = "type")]
pub struct StructType {
/// Struct fields
fields: Vec<StructField>,
+ /// Lookup for index by field id
+ #[serde(skip_serializing)]
Review Comment:
*I have to say this is why I want an in-memory represents :cry:. Since we
have already reached a consensus in the previous discussion, I will not
initiate a new round here.*
##########
src/spec/datatypes.rs:
##########
@@ -190,21 +194,87 @@ impl fmt::Display for PrimitiveType {
}
/// DataType for a specific struct
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[derive(Debug, Serialize, PartialEq, Eq, Clone)]
#[serde(rename = "struct", tag = "type")]
pub struct StructType {
/// Struct fields
fields: Vec<StructField>,
+ /// Lookup for index by field id
+ #[serde(skip_serializing)]
+ id_lookup: BTreeMap<i32, usize>,
+ /// Lookup for index by field name
+ #[serde(skip_serializing)]
+ name_lookup: HashMap<String, usize>,
+}
+
+impl<'de> Deserialize<'de> for StructType {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ #[derive(Deserialize)]
+ #[serde(field_identifier, rename_all = "lowercase")]
+ enum Field {
+ Type,
+ Fields,
+ }
+
+ struct StructTypeVisitor;
+
+ impl<'de> Visitor<'de> for StructTypeVisitor {
+ type Value = StructType;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result
{
+ formatter.write_str("struct")
+ }
+
+ fn visit_map<V>(self, mut map: V) -> Result<StructType, V::Error>
+ where
+ V: MapAccess<'de>,
+ {
+ let mut fields = None;
+ while let Some(key) = map.next_key()? {
+ match key {
+ Field::Type => (),
+ Field::Fields => {
+ if fields.is_some() {
+ return
Err(de::Error::duplicate_field("fields"));
+ }
+ fields = Some(map.next_value()?);
+ }
+ }
+ }
+ let fields: Vec<StructField> =
+ fields.ok_or_else(|| de::Error::missing_field("fields"))?;
+
+ Ok(StructType::new(fields))
+ }
+ }
+
+ const FIELDS: &[&str] = &["type", "fields"];
+ deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
+ }
}
impl StructType {
+ ///
+ pub fn new(fields: Vec<StructField>) -> Self {
+ let id_lookup = BTreeMap::from_iter(fields.iter().enumerate().map(|(i,
x)| (x.id, i)));
+ let name_lookup =
+ HashMap::from_iter(fields.iter().enumerate().map(|(i, x)|
(x.name.clone(), i)));
+ Self {
+ fields,
+ id_lookup,
+ name_lookup,
+ }
+ }
/// Get structfield with certain id
- pub fn get(&self, id: usize) -> Option<&StructField> {
- self.fields.iter().find(|field| field.id as usize == id)
+ pub fn get(&self, id: i32) -> Option<&StructField> {
Review Comment:
`get` is a bit confusing for type `StructType`. How about using `get_field`
and `get_field_by_name`?
##########
src/spec/datatypes.rs:
##########
@@ -190,21 +194,87 @@ impl fmt::Display for PrimitiveType {
}
/// DataType for a specific struct
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[derive(Debug, Serialize, PartialEq, Eq, Clone)]
#[serde(rename = "struct", tag = "type")]
pub struct StructType {
/// Struct fields
fields: Vec<StructField>,
+ /// Lookup for index by field id
+ #[serde(skip_serializing)]
+ id_lookup: BTreeMap<i32, usize>,
+ /// Lookup for index by field name
+ #[serde(skip_serializing)]
+ name_lookup: HashMap<String, usize>,
+}
+
+impl<'de> Deserialize<'de> for StructType {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ #[derive(Deserialize)]
+ #[serde(field_identifier, rename_all = "lowercase")]
+ enum Field {
+ Type,
+ Fields,
+ }
+
+ struct StructTypeVisitor;
+
+ impl<'de> Visitor<'de> for StructTypeVisitor {
+ type Value = StructType;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result
{
+ formatter.write_str("struct")
+ }
+
+ fn visit_map<V>(self, mut map: V) -> Result<StructType, V::Error>
+ where
+ V: MapAccess<'de>,
+ {
+ let mut fields = None;
+ while let Some(key) = map.next_key()? {
+ match key {
+ Field::Type => (),
+ Field::Fields => {
+ if fields.is_some() {
+ return
Err(de::Error::duplicate_field("fields"));
+ }
+ fields = Some(map.next_value()?);
+ }
+ }
+ }
+ let fields: Vec<StructField> =
+ fields.ok_or_else(|| de::Error::missing_field("fields"))?;
+
+ Ok(StructType::new(fields))
+ }
+ }
+
+ const FIELDS: &[&str] = &["type", "fields"];
+ deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
+ }
}
impl StructType {
+ ///
Review Comment:
Missing the comment.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]