jecsand838 commented on code in PR #7451:
URL: https://github.com/apache/arrow-rs/pull/7451#discussion_r2069363906


##########
arrow-avro/src/reader/record.rs:
##########
@@ -267,10 +305,83 @@ impl Decoder {
                     .collect::<Result<Vec<_>, _>>()?;
                 Arc::new(StructArray::new(fields.clone(), arrays, nulls))
             }
+            Self::Map(map_field, k_off, m_off, kdata, valdec) => {
+                let moff = flush_offsets(m_off);
+                let koff = flush_offsets(k_off);
+                let kd = flush_values(kdata).into();
+                let val_arr = valdec.flush(None)?;
+                let key_arr = StringArray::new(koff, kd, None);
+                if key_arr.len() != val_arr.len() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Map keys length ({}) != map values length ({})",
+                        key_arr.len(),
+                        val_arr.len()
+                    )));
+                }
+                let final_len = moff.len() - 1;
+                if let Some(n) = &nulls {
+                    if n.len() != final_len {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Map array null buffer length {} != final map 
length {final_len}",
+                            n.len()
+                        )));
+                    }
+                }
+                let entries_struct = StructArray::new(
+                    Fields::from(vec![
+                        Arc::new(ArrowField::new("key", DataType::Utf8, 
false)),
+                        Arc::new(ArrowField::new("value", 
val_arr.data_type().clone(), true)),
+                    ]),
+                    vec![Arc::new(key_arr), val_arr],
+                    None,
+                );
+                let map_arr = MapArray::new(map_field.clone(), moff, 
entries_struct, nulls, false);
+                Arc::new(map_arr)
+            }
         })
     }
 }
 
+
+fn read_map_blocks(
+    buf: &mut AvroCursor,
+    decode_entry: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    read_blockwise_items(buf, true, decode_entry)
+}
+
+fn read_blockwise_items(
+    buf: &mut AvroCursor,
+    read_size_after_negative: bool,
+    mut decode_fn: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    let mut total = 0usize;
+    loop {
+        let blk = buf.get_long()?;

Review Comment:
   @klion26 
   
   `blk` is the block count and there are cases where `blk` will be negative. 
   
   A negative `blk` is expected by the Avro spec for block-encoded arrays & 
maps, indicating that the count is `-blk` items. Avro decoders usually handle 
this by reading the size marker and then proceeding to decode `|blk|` entries 
in that block. After finishing the block, decoding continues with the next 
block count, until a 0 count terminates the sequence.
   
   Here's the text from the [Avro 
Specification](https://avro.apache.org/docs/1.11.1/specification/#maps) 
regarding maps with negative block counts:
   
   > If a block’s count is negative, its absolute value is used, and the count 
is followed immediately by a long block size indicating the number of bytes in 
the block. This block size permits fast skipping through data, e.g., when 
projecting a record to a subset of its fields.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to