adamreeve commented on code in PR #7111: URL: https://github.com/apache/arrow-rs/pull/7111#discussion_r1999974296
########## parquet/src/file/writer.rs: ########## @@ -699,44 +803,96 @@ impl<'a> SerializedColumnWriter<'a> { /// `SerializedPageWriter` should not be used after calling `close()`. pub struct SerializedPageWriter<'a, W: Write> { sink: &'a mut TrackedWrite<W>, + #[cfg(feature = "encryption")] + page_encryptor: Option<PageEncryptor>, + #[cfg(not(feature = "encryption"))] + page_encryptor: Option<Never>, } impl<'a, W: Write> SerializedPageWriter<'a, W> { /// Creates new page writer. pub fn new(sink: &'a mut TrackedWrite<W>) -> Self { - Self { sink } + Self { + sink, + page_encryptor: None, + } + } + + #[cfg(feature = "encryption")] + /// Set the encryptor to use to encrypt page data + fn with_page_encryptor(mut self, page_encryptor: Option<PageEncryptor>) -> Self { + self.page_encryptor = page_encryptor; + self } /// Serializes page header into Thrift. /// Returns number of bytes that have been written into the sink. #[inline] fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result<usize> { let start_pos = self.sink.bytes_written(); - { - let mut protocol = TCompactOutputProtocol::new(&mut self.sink); - header.write_to_out_protocol(&mut protocol)?; + match self.page_encryptor.as_ref() { + #[cfg(feature = "encryption")] + Some(page_encryptor) => { + page_encryptor.encrypt_page_header(&header, &mut self.sink)?; + } + _ => { + let mut protocol = TCompactOutputProtocol::new(&mut self.sink); + header.write_to_out_protocol(&mut protocol)?; + } } Ok(self.sink.bytes_written() - start_pos) } } +trait PageModuleWriter { + fn serialize_page(&mut self, page: &CompressedPage) -> Result<Vec<u8>>; +} + +#[cfg(not(feature = "encryption"))] +impl<W: Write + Send> PageModuleWriter for SerializedPageWriter<'_, W> { + fn serialize_page(&mut self, page: &CompressedPage) -> Result<Vec<u8>> { + Ok(page.data().to_vec()) Review Comment: It's not great that we need to copy all the page data here when we didn't before. I think if we can do something like https://github.com/apache/arrow-rs/pull/7111#discussion_r1999933781 then this wouldn't be needed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org