tustvold commented on code in PR #6738:
URL: https://github.com/apache/arrow-rs/pull/6738#discussion_r1844933946
##########
parquet/src/file/metadata/mod.rs:
##########
@@ -959,17 +959,18 @@ impl ColumnChunkMetaData {
}
/// Returns the offset and length in bytes of the column chunk within the
file
- pub fn byte_range(&self) -> (u64, u64) {
+ pub fn byte_range(&self) -> Result<(u64, u64)> {
Review Comment:
This is a breaking API change
##########
parquet/src/thrift.rs:
##########
@@ -96,13 +106,22 @@ impl<'a> TCompactSliceInputProtocol<'a> {
}
}
+macro_rules! thrift_unimplemented {
+ () => {
+ Err(thrift::Error::Protocol(thrift::ProtocolError {
+ kind: thrift::ProtocolErrorKind::NotImplemented,
+ message: "not implemented".to_string(),
+ }))
+ };
+}
+
impl TInputProtocol for TCompactSliceInputProtocol<'_> {
fn read_message_begin(&mut self) -> thrift::Result<TMessageIdentifier> {
- unimplemented!()
+ thrift_unimplemented!()
Review Comment:
This should be unreachable, a panic is the correct thing to do here
##########
parquet/src/thrift.rs:
##########
@@ -67,7 +67,17 @@ impl<'a> TCompactSliceInputProtocol<'a> {
let mut shift = 0;
loop {
let byte = self.read_byte()?;
- in_progress |= ((byte & 0x7F) as u64) << shift;
+ let val = (byte & 0x7F) as u64;
+ let val = val.checked_shl(shift).map_or_else(
Review Comment:
This is a very performance critical code path, this probably should use
wrapping_shl
##########
parquet/src/format.rs:
##########
@@ -1738,6 +1738,12 @@ impl crate::thrift::TSerializable for IntType {
bit_width: f_1.expect("auto-generated code should have checked for
presence of required fields"),
is_signed: f_2.expect("auto-generated code should have checked for
presence of required fields"),
};
+ if ret.bit_width != 8 && ret.bit_width != 16 && ret.bit_width != 32 &&
ret.bit_width != 64 {
Review Comment:
This is an auto generated file, in place edits need to be scripted
##########
parquet/src/schema/types.rs:
##########
@@ -1227,6 +1231,10 @@ fn from_thrift_helper(elements: &[SchemaElement], index:
usize) -> Result<(usize
if !is_root_node {
builder = builder.with_repetition(rep);
}
+ } else if !is_root_node {
Review Comment:
Do we need this check?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]