This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c6359bf78da Avoid copy/allocation when read view types from parquet
(#5877)
c6359bf78da is described below
commit c6359bf78dab76c16112cfbe22c83af680ba7376
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Thu Jun 13 09:22:03 2024 -0400
Avoid copy/allocation when read view types from parquet (#5877)
* avoid copy/allocation when build from offset buffer
* avoid hard code block id
---
parquet/src/arrow/buffer/offset_buffer.rs | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs
b/parquet/src/arrow/buffer/offset_buffer.rs
index 32bb9d0862b..181e69c669a 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -156,16 +156,19 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
fn build_generic_byte_view(self) -> GenericByteViewBuilder<BinaryViewType>
{
let mut builder =
GenericByteViewBuilder::<BinaryViewType>::with_capacity(self.len());
- let mut values = self.values;
+ let buffer = self.values.into();
+ let block = builder.append_block(buffer);
for window in self.offsets.windows(2) {
let start = window[0];
let end = window[1];
let len = (end - start).to_usize().unwrap();
- let b = values.drain(..len).collect::<Vec<u8>>();
- if b.is_empty() {
- builder.append_null();
+
+ if len != 0 {
+ builder
+ .try_append_view(block, start.as_usize() as u32, len as
u32)
+ .unwrap();
} else {
- builder.append_value(b);
+ builder.append_null();
}
}
builder