tustvold commented on code in PR #1935:
URL: https://github.com/apache/arrow-rs/pull/1935#discussion_r909551523
##########
parquet/src/file/writer.rs:
##########
@@ -177,10 +182,66 @@ impl<W: Write> SerializedFileWriter<W> {
Ok(())
}
+ /// Serialize all the offset index to the file
+ fn write_offset_indexes(&mut self) -> Result<()> {
+ // iter row group
+ // iter each column
+ // write offset index to the file
+ for row_group in &mut self.row_groups {
+ for column_metdata in row_group.columns_mut() {
+ match column_metdata.offset_index() {
+ Some(offset_index) => {
+ let start_offset = self.buf.bytes_written();
+ let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
+ offset_index.write_to_out_protocol(&mut protocol)?;
+ protocol.flush()?;
+ let end_offset = self.buf.bytes_written();
+ // set offset and index for offset index
+ column_metdata.set_offset_index_offset(start_offset as
i64);
+ column_metdata
+ .set_offset_index_length((end_offset -
start_offset) as i32);
Review Comment:
I think I would prefer if we can avoid this being a breaking API change,
which this currently is, if possible
##########
parquet/src/file/writer.rs:
##########
@@ -177,10 +182,66 @@ impl<W: Write> SerializedFileWriter<W> {
Ok(())
}
+ /// Serialize all the offset index to the file
+ fn write_offset_indexes(&mut self) -> Result<()> {
+ // iter row group
+ // iter each column
+ // write offset index to the file
+ for row_group in &mut self.row_groups {
+ for column_metdata in row_group.columns_mut() {
+ match column_metdata.offset_index() {
+ Some(offset_index) => {
+ let start_offset = self.buf.bytes_written();
+ let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
+ offset_index.write_to_out_protocol(&mut protocol)?;
+ protocol.flush()?;
+ let end_offset = self.buf.bytes_written();
+ // set offset and index for offset index
+ column_metdata.set_offset_index_offset(start_offset as
i64);
+ column_metdata
+ .set_offset_index_length((end_offset -
start_offset) as i32);
Review Comment:
I think I would prefer if we can avoid this being a breaking API change,
which this PR currently is, if possible
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]