Jefffrey commented on code in PR #10237:
URL: https://github.com/apache/arrow-rs/pull/10237#discussion_r3505985310
##########
arrow-avro/src/reader/block.rs:
##########
@@ -96,7 +96,13 @@ impl BlockDecoder {
AvroError::ParseError(format!("Block size cannot
be negative, got {c}"))
})?;
- self.in_progress.data.reserve(self.bytes_remaining);
+ // Only reserve what the current input backs: the
block size is
+ // attacker-controlled here, so reserving it outright
lets a crafted
+ // `i64::MAX` size abort the process (#10234). The
rest is reserved
+ // lazily by `extend_from_slice` below as data arrives.
Review Comment:
```suggestion
// Only reserve what the current input backs: the
block size is
// input specified so could be an extreme value
(e.g. i64::MAX)
// in case of corrupted/malicious input. The rest is
reserved
// lazily by `extend_from_slice` below as data
arrives.
```
just to soften the language; calling it `attacker-controlled` is
unnecessarily defensive
##########
arrow-avro/src/reader/record.rs:
##########
@@ -2286,35 +2286,57 @@ fn process_blockwise(
match block_count.cmp(&0) {
Ordering::Equal => break,
Ordering::Less => {
- let count = (-block_count) as usize;
+ // `unsigned_abs` avoids overflowing `-block_count` for
`i64::MIN` (#10235)
+ let count = block_count.unsigned_abs() as usize;
// A negative count is followed by a long of the size in bytes
- let size_in_bytes = buf.get_long()? as usize;
+ let raw_size = buf.get_long()?;
+ let size_in_bytes = usize::try_from(raw_size).map_err(|_| {
+ AvroError::ParseError(format!("Block size cannot be
negative, got {raw_size}"))
+ })?;
match negative_behavior {
NegativeBlockBehavior::ProcessItems => {
// Process items one-by-one after reading size
- for _ in 0..count {
- on_item(buf)?;
- }
+ process_block_items(buf, count, &mut on_item)?;
}
NegativeBlockBehavior::SkipBySize => {
// Skip the entire block payload at once
let _ = buf.get_fixed(size_in_bytes)?;
}
}
- total += count;
+ total = total.saturating_add(count);
}
Ordering::Greater => {
let count = block_count as usize;
- for _ in 0..count {
- on_item(buf)?;
- }
- total += count;
+ process_block_items(buf, count, &mut on_item)?;
+ total = total.saturating_add(count);
}
}
}
Ok(total)
}
+/// Decode `count` block items, rejecting a `count` larger than the bytes left
to
+/// decode them from. A crafted block can advertise up to `i64::MAX` items
which,
+/// with a zero-byte decoder like `null`, would otherwise spin forever
(#10235).
+/// Items that read input each need at least one byte, so a real `count`
always fits.
+#[inline]
+fn process_block_items(
+ buf: &mut AvroCursor,
+ count: usize,
+ on_item: &mut impl FnMut(&mut AvroCursor) -> Result<(), AvroError>,
+) -> Result<(), AvroError> {
+ let remaining = buf.remaining();
+ if count > remaining {
+ return Err(AvroError::ParseError(format!(
+ "Block declares {count} items but only {remaining} bytes remain"
+ )));
+ }
Review Comment:
Would this in essence prevent us from decoding `array<null>` entirely? How
do other avro implementations handle edge cases like this? Since technically it
is valid input 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]