klion26 commented on code in PR #7451:
URL: https://github.com/apache/arrow-rs/pull/7451#discussion_r2065764967


##########
arrow-avro/src/codec.rs:
##########
@@ -284,9 +313,14 @@ fn make_data_type<'a>(
             ComplexType::Enum(e) => Err(ArrowError::NotYetImplemented(format!(
                 "Enum of {e:?} not currently supported"
             ))),
-            ComplexType::Map(m) => Err(ArrowError::NotYetImplemented(format!(
-                "Map of {m:?} not currently supported"
-            ))),
+            ComplexType::Map(m) => {
+                let val = make_data_type(&m.values, namespace, resolver)?;
+                Ok(AvroDataType {
+                    nullability: None,

Review Comment:
   Do we need to set the `nullability` to `val.nullability`?



##########
arrow-avro/src/codec.rs:
##########
@@ -155,6 +168,22 @@ impl Codec {
                 
DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME)))
             }
             Self::Struct(f) => DataType::Struct(f.iter().map(|x| 
x.field()).collect()),
+            Self::Map(value_type) => {
+                let val_dt = value_type.codec.data_type();
+                let val_field = Field::new("value", val_dt, true)

Review Comment:
   Does the `nullability` set to `value_type.nullability.is_some()`



##########
arrow-avro/src/reader/record.rs:
##########
@@ -290,3 +401,84 @@ fn flush_primitive<T: ArrowPrimitiveType>(
 }
 
 const DEFAULT_CAPACITY: usize = 1024;
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{
+        cast::AsArray, Array, Decimal128Array, DictionaryArray, 
FixedSizeBinaryArray,
+        IntervalMonthDayNanoArray, ListArray, MapArray, StringArray, 
StructArray,
+    };
+
+    fn encode_avro_long(value: i64) -> Vec<u8> {
+        let mut buf = Vec::new();
+        let mut v = (value << 1) ^ (value >> 63);
+        while v & !0x7F != 0 {
+            buf.push(((v & 0x7F) | 0x80) as u8);
+            v >>= 7;
+        }
+        buf.push(v as u8);
+        buf
+    }
+
+    fn encode_avro_bytes(bytes: &[u8]) -> Vec<u8> {
+        let mut buf = encode_avro_long(bytes.len() as i64);
+        buf.extend_from_slice(bytes);
+        buf
+    }
+
+    fn avro_from_codec(codec: Codec) -> AvroDataType {
+        AvroDataType::new(codec, None, Default::default())

Review Comment:
   Not sure if we need to test the `nullability` to `Some(x)`



##########
arrow-avro/src/reader/record.rs:
##########
@@ -267,10 +305,83 @@ impl Decoder {
                     .collect::<Result<Vec<_>, _>>()?;
                 Arc::new(StructArray::new(fields.clone(), arrays, nulls))
             }
+            Self::Map(map_field, k_off, m_off, kdata, valdec) => {
+                let moff = flush_offsets(m_off);
+                let koff = flush_offsets(k_off);
+                let kd = flush_values(kdata).into();
+                let val_arr = valdec.flush(None)?;
+                let key_arr = StringArray::new(koff, kd, None);
+                if key_arr.len() != val_arr.len() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Map keys length ({}) != map values length ({})",
+                        key_arr.len(),
+                        val_arr.len()
+                    )));
+                }
+                let final_len = moff.len() - 1;
+                if let Some(n) = &nulls {
+                    if n.len() != final_len {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Map array null buffer length {} != final map 
length {final_len}",
+                            n.len()
+                        )));
+                    }
+                }
+                let entries_struct = StructArray::new(
+                    Fields::from(vec![
+                        Arc::new(ArrowField::new("key", DataType::Utf8, 
false)),
+                        Arc::new(ArrowField::new("value", 
val_arr.data_type().clone(), true)),
+                    ]),
+                    vec![Arc::new(key_arr), val_arr],
+                    None,
+                );
+                let map_arr = MapArray::new(map_field.clone(), moff, 
entries_struct, nulls, false);
+                Arc::new(map_arr)
+            }
         })
     }
 }
 
+
+fn read_map_blocks(
+    buf: &mut AvroCursor,
+    decode_entry: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    read_blockwise_items(buf, true, decode_entry)
+}
+
+fn read_blockwise_items(
+    buf: &mut AvroCursor,
+    read_size_after_negative: bool,
+    mut decode_fn: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    let mut total = 0usize;
+    loop {
+        let blk = buf.get_long()?;

Review Comment:
   It seems `blk` is the number of items needed to be read. Is there any case 
where `blk` will be negative?



##########
arrow-avro/src/codec.rs:
##########
@@ -155,6 +168,22 @@ impl Codec {
                 
DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME)))
             }
             Self::Struct(f) => DataType::Struct(f.iter().map(|x| 
x.field()).collect()),
+            Self::Map(value_type) => {
+                let val_dt = value_type.codec.data_type();
+                let val_field = Field::new("value", val_dt, true)

Review Comment:
   Read the 
[spec](https://avro.apache.org/docs/1.11.1/specification/#schema-declaration), 
did not find that if the value for `Map` can be null or not, but asked 
Deepseek, it says that the value can be null if the schema supports it(use 
`Unions` such as `["null", "string"]`), else it can't be null.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to