scovich commented on code in PR #7704:
URL: https://github.com/apache/arrow-rs/pull/7704#discussion_r2155500145
##########
parquet-variant/src/variant.rs:
##########
@@ -223,113 +193,79 @@ impl<'m> VariantMetadata<'m> {
pub fn dictionary_size(&self) -> usize {
self.dict_size
}
+
+ /// The variant protocol version
pub fn version(&self) -> u8 {
self.header.version
}
- /// Helper method to get the offset start and end range for a key by index.
- fn get_offsets_for_key_by(&self, index: usize) -> Result<Range<usize>,
ArrowError> {
- if index >= self.dict_size {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Index {} out of bounds for dictionary of length {}",
- index, self.dict_size
- )));
- }
-
- // Skipping the header byte (setting byte_offset = 1) and the
dictionary_size (setting offset_index +1)
- let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i
+ 1);
- Ok(unpack(index)?..unpack(index + 1)?)
- }
-
- /// Get a single offset by index
- pub fn get_offset_by(&self, index: usize) -> Result<usize, ArrowError> {
- if index >= self.dict_size {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Index {} out of bounds for dictionary of length {}",
- index, self.dict_size
- )));
- }
-
+ /// Gets an offset array entry by index.
+ fn get_offset(&self, i: usize) -> Result<usize, ArrowError> {
// Skipping the header byte (setting byte_offset = 1) and the
dictionary_size (setting offset_index +1)
- let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i
+ 1);
- unpack(index)
+ let bytes = slice_from_slice(self.bytes,
..self.dictionary_key_start_byte)?;
+ self.header.offset_size.unpack_usize(bytes, 1, i + 1)
}
- /// Get the key-name by index
- pub fn get_field_by(&self, index: usize) -> Result<&'m str, ArrowError> {
- let offset_range = self.get_offsets_for_key_by(index)?;
- self.get_field_by_offset(offset_range)
+ /// Gets a dictionary entry by index
+ pub fn get(&self, i: usize) -> Result<&'m str, ArrowError> {
+ let dictionary_keys_bytes = slice_from_slice(self.bytes,
self.dictionary_key_start_byte..)?;
+ let byte_range = self.get_offset(i)?..self.get_offset(i + 1)?;
+ string_from_slice(dictionary_keys_bytes, byte_range)
Review Comment:
The checked iterator currently calls this method, so we'd have to split up
the code to distinguish first-touch (checked) vs. later touches (unchecked)?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]