tustvold commented on code in PR #4871:
URL: https://github.com/apache/arrow-rs/pull/4871#discussion_r1338855722


##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -347,31 +349,68 @@ impl PageWriter for ArrowPageWriter {
     }
 }
 
-/// Encodes a leaf column to [`ArrowPageWriter`]
-enum ArrowColumnWriter {
+/// A leaf column that can be encoded by [`ArrowColumnWriter`]
+pub struct ArrowLeafColumn(ArrayLevels);
+
+/// Computes the [`ArrowLeafColumn`] for a given potentially nested 
[`ArrayRef`]
+pub fn compute_leaves(field: &Field, array: &ArrayRef) -> 
Result<Vec<ArrowLeafColumn>> {
+    let levels = calculate_array_levels(array, field)?;
+    Ok(levels.into_iter().map(ArrowLeafColumn).collect())
+}
+
+/// Encodes [`ArrowLeafColumn`] to [`ArrowColumnChunk`]
+pub struct ArrowColumnWriter {
+    writer: ArrowColumnWriterImpl,
+    chunk: SharedColumnChunk,
+}
+
+enum ArrowColumnWriterImpl {
     ByteArray(GenericColumnWriter<'static, ByteArrayEncoder>),
     Column(ColumnWriter<'static>),
 }
 
 impl ArrowColumnWriter {
+    /// Write an [`ArrowLeafColumn`]
+    pub fn write(&mut self, col: &ArrowLeafColumn) -> Result<()> {
+        match &mut self.writer {
+            ArrowColumnWriterImpl::Column(c) => {
+                write_leaf(c, &col.0)?;
+            }
+            ArrowColumnWriterImpl::ByteArray(c) => {
+                write_primitive(c, col.0.array().as_ref(), &col.0)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Close this column returning the [`ArrowColumnChunk`] and 
[`ColumnCloseResult`]
+    pub fn close(self) -> Result<(ArrowColumnChunk, ColumnCloseResult)> {
+        let result = match self.writer {
+            ArrowColumnWriterImpl::ByteArray(c) => c.close()?,
+            ArrowColumnWriterImpl::Column(c) => c.close()?,
+        };
+        let chunk = Arc::try_unwrap(self.chunk).ok().unwrap();
+        Ok((chunk.into_inner().unwrap(), result))
+    }
+
     /// Returns the estimated total bytes for this column writer
-    fn get_estimated_total_bytes(&self) -> u64 {
-        match self {
-            ArrowColumnWriter::ByteArray(c) => c.get_estimated_total_bytes(),
-            ArrowColumnWriter::Column(c) => c.get_estimated_total_bytes(),
+    pub fn get_estimated_total_bytes(&self) -> usize {
+        match &self.writer {
+            ArrowColumnWriterImpl::ByteArray(c) => 
c.get_estimated_total_bytes() as _,
+            ArrowColumnWriterImpl::Column(c) => c.get_estimated_total_bytes() 
as _,
         }
     }
 }
 
 /// Encodes [`RecordBatch`] to a parquet row group
-pub struct ArrowRowGroupWriter {
-    writers: Vec<(SharedColumnChunk, ArrowColumnWriter)>,
+struct ArrowRowGroupWriter {

Review Comment:
   This no longer needs to be public, as this hasn't been released yet, I opted 
to partially revert the change in (#4850)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to