This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1ff43ab5ee GH-38516: [Go][Parquet] Increment the number of rows 
written when appending a new row group (#38517)
1ff43ab5ee is described below

commit 1ff43ab5ee13de5c3130acf10c7aa8eb9680baab
Author: Tim Schaub <[email protected]>
AuthorDate: Mon Nov 13 10:01:01 2023 -0700

    GH-38516: [Go][Parquet] Increment the number of rows written when appending 
a new row group (#38517)
    
    ### Rationale for this change
    
    This makes it so the `NumRows` method on the `file.Writer` reports the 
total number of rows written across multiple row groups.
    
    ### Are these changes tested?
    
    A regression test is added that asserts that the total number of rows 
written matches expectations.
    
    * Closes: #38516
    
    Authored-by: Tim Schaub <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/parquet/file/file_writer.go      | 1 +
 go/parquet/file/file_writer_test.go | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go
index 48eb8f44d1..1d7f7840da 100644
--- a/go/parquet/file/file_writer.go
+++ b/go/parquet/file/file_writer.go
@@ -121,6 +121,7 @@ func (fw *Writer) AppendRowGroup() SerialRowGroupWriter {
 
 func (fw *Writer) appendRowGroup(buffered bool) *rowGroupWriter {
        if fw.rowGroupWriter != nil {
+               fw.nrows += fw.rowGroupWriter.nrows
                fw.rowGroupWriter.Close()
        }
        fw.rowGroups++
diff --git a/go/parquet/file/file_writer_test.go 
b/go/parquet/file/file_writer_test.go
index beceff9732..f32e403a8d 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -97,6 +97,8 @@ func (t *SerializeTestSuite) fileSerializeTest(codec 
compress.Compression, expec
        writer.Close()
 
        nrows := t.numRowGroups * t.rowsPerRG
+       t.EqualValues(nrows, writer.NumRows())
+
        reader, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
        t.NoError(err)
        t.Equal(t.numCols, reader.MetaData().Schema.NumColumns())

Reply via email to