zeroshade commented on code in PR #548:
URL: https://github.com/apache/arrow-go/pull/548#discussion_r2466075847
##########
parquet/pqarrow/file_writer_test.go:
##########
@@ -133,3 +133,78 @@ func TestFileWriterBuffered(t *testing.T) {
require.NoError(t, writer.Close())
assert.Equal(t, 4, writer.NumRows())
}
+
+func TestFileWriterTotalBytes(t *testing.T) {
+ schema := arrow.NewSchema([]arrow.Field{
+ {Name: "one", Nullable: true, Type:
arrow.PrimitiveTypes.Float64},
+ {Name: "two", Nullable: true, Type:
arrow.PrimitiveTypes.Float64},
+ }, nil)
+
+ data := `[
+ {"one": 1, "two": 2},
+ {"one": 3, "two": 4}
+ ]`
+ record1, _, err := array.RecordFromJSON(memory.DefaultAllocator,
schema, strings.NewReader(data))
+ require.NoError(t, err)
+ defer record1.Release()
+
+ data2 := `[
+ {"one": 5, "two": 6},
+ {"one": 7, "two": 8}
+ ]`
+ record2, _, err := array.RecordFromJSON(memory.DefaultAllocator,
schema, strings.NewReader(data2))
+ require.NoError(t, err)
+ defer record2.Release()
+
+ output := &bytes.Buffer{}
+ writerProps :=
parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(2))
+ writer, err := pqarrow.NewFileWriter(schema, output, writerProps,
pqarrow.DefaultWriterProps())
+ require.NoError(t, err)
+
+ // Write first record
+ require.NoError(t, writer.Write(record1))
+
+ // Write second record, which creates a new row group
+ require.NoError(t, writer.Write(record2))
+
+ // Close the writer and verify final bytes
+ require.NoError(t, writer.Close())
+
+ // Verify total bytes & compressed bytes are greater than 0
+ assert.Greater(t, writer.TotalCompressedBytes(), int64(0))
+ assert.Greater(t, writer.TotalBytesWritten(), int64(0))
Review Comment:
since we're creating the test and know the settings, we can probably know
the exact size to test for and have the test be explicit on the size we expect
so that we know if we break anything in the future. Make sense?
##########
parquet/pqarrow/file_writer.go:
##########
@@ -111,11 +115,27 @@ func (fw *FileWriter) NewRowGroup() {
func (fw *FileWriter) NewBufferedRowGroup() {
if fw.rgw != nil {
fw.rgw.Close()
+ fw.totalCompressedBytes += fw.rgw.TotalCompressedBytes()
+ fw.totalBytesWritten += fw.rgw.TotalBytesWritten()
Review Comment:
In `FileWriter.Close` when we write the metadata, we should probably add the
size of the metadata we write to the total bytes written, yea?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]