etseidl commented on code in PR #8341:
URL: https://github.com/apache/arrow-rs/pull/8341#discussion_r2345440018


##########
parquet/src/parquet_thrift.rs:
##########
@@ -428,122 +388,176 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> {
         }
 
         match field_type {
-            FieldType::BooleanFalse | FieldType::BooleanTrue => 
self.read_bool().map(|_| ()),
+            // boolean field has no data
+            FieldType::BooleanFalse | FieldType::BooleanTrue => Ok(()),
             FieldType::Byte => self.read_i8().map(|_| ()),
             FieldType::I16 => self.skip_vlq().map(|_| ()),
             FieldType::I32 => self.skip_vlq().map(|_| ()),
             FieldType::I64 => self.skip_vlq().map(|_| ()),
             FieldType::Double => self.skip_bytes(8).map(|_| ()),
             FieldType::Binary => self.skip_binary().map(|_| ()),
             FieldType::Struct => {
-                self.read_struct_begin()?;
+                let mut last_field_id = 0i16;
                 loop {
-                    let field_ident = self.read_field_begin()?;
+                    let field_ident = self.read_field_begin(last_field_id)?;
                     if field_ident.field_type == FieldType::Stop {
                         break;
                     }
                     self.skip_till_depth(field_ident.field_type, depth - 1)?;
+                    last_field_id = field_ident.id;
                 }
-                self.read_struct_end()
+                Ok(())
             }
             FieldType::List => {
                 let list_ident = self.read_list_begin()?;
                 for _ in 0..list_ident.size {
                     let element_type = 
FieldType::try_from(list_ident.element_type)?;
                     self.skip_till_depth(element_type, depth - 1)?;
                 }
-                self.read_list_end()
+                Ok(())
             }
             // no list or map types in parquet format
             u => Err(general_err!(format!("cannot skip field type {:?}", &u))),
         }
     }
 }
 
+/// A high performance Thrift reader that reads from a slice of bytes.
+pub(crate) struct ThriftSliceInputProtocol<'a> {
+    buf: &'a [u8],
+}
+
+impl<'a> ThriftSliceInputProtocol<'a> {
+    /// Create a new `ThriftSliceInputProtocol` using the bytes in `buf`.
+    pub fn new(buf: &'a [u8]) -> Self {
+        Self { buf }
+    }
+
+    /// Re-initialize this reader with a new slice.
+    pub fn reset_buffer(&mut self, buf: &'a [u8]) {
+        self.buf = buf;
+    }
+
+    /// Return the current buffer as a slice.
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.buf
+    }
+}
+
+impl<'b, 'a: 'b> ThriftCompactInputProtocol<'b> for 
ThriftSliceInputProtocol<'a> {
+    #[inline]
+    fn read_byte(&mut self) -> Result<u8> {
+        let ret = *self.buf.first().ok_or_else(eof_error)?;
+        self.buf = &self.buf[1..];
+        Ok(ret)
+    }
+
+    fn read_bytes(&mut self) -> Result<&'b [u8]> {
+        let len = self.read_vlq()? as usize;
+        let ret = self.buf.get(..len).ok_or_else(eof_error)?;
+        self.buf = &self.buf[len..];
+        Ok(ret)
+    }
+
+    #[inline]
+    fn skip_bytes(&mut self, n: usize) -> Result<()> {
+        self.buf.get(..n).ok_or_else(eof_error)?;
+        self.buf = &self.buf[n..];
+        Ok(())
+    }
+
+    fn read_double(&mut self) -> Result<f64> {
+        let slice = self.buf.get(..8).ok_or_else(eof_error)?;
+        self.buf = &self.buf[8..];
+        match slice.try_into() {
+            Ok(slice) => Ok(f64::from_le_bytes(slice)),
+            Err(_) => Err(general_err!("Unexpected error converting slice")),
+        }
+    }
+}
+
 fn eof_error() -> ParquetError {
     eof_err!("Unexpected EOF")
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for bool {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+/// Trait implemented for objects that can be deserialized from a Thrift input 
stream.
+/// Implementations are provided for Thrift primitive types.
+pub(crate) trait ReadThrift<'a, R: ThriftCompactInputProtocol<'a>> {
+    /// Read an object of type `Self` from the input protocol object.
+    fn read_thrift(prot: &mut R) -> Result<Self>
+    where
+        Self: Sized;
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for bool {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_bool()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i8 {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i8 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_i8()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i16 {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i16 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_i16()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i32 {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i32 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_i32()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i64 {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i64 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_i64()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for OrderedF64 {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for OrderedF64 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         Ok(OrderedF64(prot.read_double()?))
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a str {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a str {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_string()
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for String {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for String {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         Ok(prot.read_string()?.to_owned())
     }
 }
 
-impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a [u8] {
-    type Error = ParquetError;
-    fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result<Self> {
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a [u8] {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
         prot.read_bytes()
     }
 }
 
-impl<'a, T> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Vec<T>
+/// Read a Thrift encoded [list] from the input protocol object.
+///
+/// [list]: 
https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
+pub(crate) fn read_thrift_vec<'a, T, R>(prot: &mut R) -> Result<Vec<T>>

Review Comment:
   This is a big reason why this change was needed...trying to get the reading 
of vectors of serializable objects working with the changes to the 
`ThriftCompactInputProtocol` changes was just too hard. Adding this function a) 
worked and b) made implementing the thrift macros easier.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to