liamzwbao commented on code in PR #9658:
URL: https://github.com/apache/arrow-rs/pull/9658#discussion_r3093493761
##########
arrow-json/src/reader/run_end_array.rs:
##########
@@ -63,58 +66,33 @@ impl<R: RunEndIndexType + Send> ArrayDecoder for
RunEndEncodedArrayDecoder<R> {
return Ok(new_empty_array(&self.data_type));
}
- let flat_data = self.decoder.decode(tape, pos)?.to_data();
+ let flat_array = self.decoder.decode(tape, pos)?;
- let mut run_ends: Vec<R::Native> = Vec::new();
- let mut mutable = MutableArrayData::new(vec![&flat_data], false, len);
+ let partitions = partition(from_ref(&flat_array))?;
+ let size = partitions.len();
+ let mut run_ends = Vec::with_capacity(size);
+ let mut indices = Vec::with_capacity(size);
- let mut run_start = 0;
- for i in 1..len {
- if !same_run(&flat_data, run_start, i) {
- let run_end = R::Native::from_usize(i).ok_or_else(|| {
- ArrowError::JsonError(format!(
- "Run end value {i} exceeds {:?} range",
- R::DATA_TYPE
- ))
- })?;
- run_ends.push(run_end);
- mutable.extend(0, run_start, run_start + 1);
- run_start = i;
- }
+ for Range { start, end } in partitions.ranges() {
+ let run_end = R::Native::from_usize(end).ok_or_else(|| {
+ ArrowError::JsonError(format!(
+ "Run end value {end} exceeds {:?} range",
+ R::DATA_TYPE
+ ))
+ })?;
+ run_ends.push(run_end);
+ indices.push(start);
}
- let run_end = R::Native::from_usize(len).ok_or_else(|| {
- ArrowError::JsonError(format!(
- "Run end value {len} exceeds {:?} range",
- R::DATA_TYPE
- ))
- })?;
- run_ends.push(run_end);
- mutable.extend(0, run_start, run_start + 1);
- let values_data = mutable.freeze();
- let run_ends_data =
- PrimitiveArray::<R>::new(ScalarBuffer::from(run_ends),
None).into_data();
+ let indices =
UInt32Array::from_iter_values(indices.into_iter().map(|i| i as u32));
Review Comment:
Yes, also `indices` are bounded by the tape pos (which is `&[u32]`), so they
should never exceed `u32::MAX` IIUC
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]