This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new d5f0c34e fix(parquet/pqarrow): return an error on pqarrow write calls 
if the writer is already closed (#728)
d5f0c34e is described below

commit d5f0c34e6e97044703be3523f29efedfd0207099
Author: Alex Normand <[email protected]>
AuthorDate: Thu Mar 26 08:21:12 2026 -0700

    fix(parquet/pqarrow): return an error on pqarrow write calls if the writer 
is already closed (#728)
    
    ### Rationale for this change
    Currently, the pqarrow FileWriter ignores the `closed` status of the
    FileWriter and write calls are attempted which can lead to surprising
    and hard to understand errors as noted in issue #727 . Returning a clear
    error stating the `FileWriter` is already closed should help nudge users
    about their misuse of the API (or provide an indicator of a potential
    race condition between invocations of Write calls and Close). .
    
    ### What changes are included in this PR?
    Adds a check on all `FileWriter` write methods to validate is a
    `FileWriter` is already closed to return a clear error and short-circuit
    the write execution.
    
    ### Are these changes tested?
    Yes, a unit tested was added to validate the behavior.
    
    ### Are there any user-facing changes?
    
    resolves #727
---
 parquet/pqarrow/file_writer.go      | 15 +++++++++++++++
 parquet/pqarrow/file_writer_test.go | 19 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/parquet/pqarrow/file_writer.go b/parquet/pqarrow/file_writer.go
index e53036a8..6c305c59 100644
--- a/parquet/pqarrow/file_writer.go
+++ b/parquet/pqarrow/file_writer.go
@@ -187,6 +187,9 @@ func (fw *FileWriter) NumRows() int {
 // More memory is utilized compared to Write as the whole row group data is 
kept in memory before it's written
 // since Parquet files must have an entire column written before writing the 
next column.
 func (fw *FileWriter) WriteBuffered(rec arrow.RecordBatch) error {
+       if fw.closed {
+               return fmt.Errorf("WriteBuffered called on already closed 
FileWriter")
+       }
        if !rec.Schema().Equal(fw.schema) {
                return fmt.Errorf("record schema does not match writer's. 
\nrecord: %s\nwriter: %s", rec.Schema(), fw.schema)
        }
@@ -241,6 +244,9 @@ func (fw *FileWriter) WriteBuffered(rec arrow.RecordBatch) 
error {
 // * a highly-restricted memory environment
 // * very large records with lots of rows (potentially close to the max row 
group length)
 func (fw *FileWriter) Write(rec arrow.RecordBatch) error {
+       if fw.closed {
+               return fmt.Errorf("invalid write call: FileWriter is already 
closed")
+       }
        if !rec.Schema().Equal(fw.schema) {
                return fmt.Errorf("record schema does not match writer's. 
\nrecord: %s\nwriter: %s", rec.Schema(), fw.schema)
        }
@@ -276,6 +282,9 @@ func (fw *FileWriter) Write(rec arrow.RecordBatch) error {
 // row group for each chunk of chunkSize rows in the table. Calling this with 
0 rows will
 // still write a 0 length Row Group to the file.
 func (fw *FileWriter) WriteTable(tbl arrow.Table, chunkSize int64) error {
+       if fw.closed {
+               return fmt.Errorf("invalid write call: FileWriter is already 
closed")
+       }
        if chunkSize <= 0 && tbl.NumRows() > 0 {
                return xerrors.New("chunk size per row group must be greater 
than 0")
        } else if !tbl.Schema().Equal(fw.schema) {
@@ -344,6 +353,9 @@ func (fw *FileWriter) Close() error {
 // building of writing columns to a file via arrow data without needing to 
already have
 // a record or table.
 func (fw *FileWriter) WriteColumnChunked(data *arrow.Chunked, offset, size 
int64) error {
+       if fw.closed {
+               return fmt.Errorf("invalid write call: FileWriter is already 
closed")
+       }
        acw, err := newArrowColumnWriter(data, offset, size, fw.manifest, 
fw.rgw, fw.colIdx)
        if err != nil {
                return err
@@ -356,6 +368,9 @@ func (fw *FileWriter) WriteColumnChunked(data 
*arrow.Chunked, offset, size int64
 // it is based on the current column of the row group writer allowing 
progressive building
 // of the file by columns without needing a full record or table to write.
 func (fw *FileWriter) WriteColumnData(data arrow.Array) error {
+       if fw.closed {
+               return fmt.Errorf("invalid write call: FileWriter is already 
closed")
+       }
        chunked := arrow.NewChunked(data.DataType(), []arrow.Array{data})
        defer chunked.Release()
        return fw.WriteColumnChunked(chunked, 0, int64(data.Len()))
diff --git a/parquet/pqarrow/file_writer_test.go 
b/parquet/pqarrow/file_writer_test.go
index 32713cd2..0c771931 100644
--- a/parquet/pqarrow/file_writer_test.go
+++ b/parquet/pqarrow/file_writer_test.go
@@ -208,3 +208,22 @@ func TestFileWriterTotalBytesBuffered(t *testing.T) {
        assert.Equal(t, int64(596), writer.TotalCompressedBytes())
        assert.Equal(t, int64(1306), writer.TotalBytesWritten())
 }
+
+func TestWriteOnClosedFileWriter(t *testing.T) {
+       schema := arrow.NewSchema([]arrow.Field{
+               {Name: "one", Nullable: true, Type: 
arrow.PrimitiveTypes.Float64},
+       }, nil)
+
+       output := &bytes.Buffer{}
+       writer, err := pqarrow.NewFileWriter(schema, output, 
parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
+       require.NoError(t, err)
+
+       // Close the writer
+       require.NoError(t, writer.Close())
+
+       // Call each write method and ensure they all return an error stating 
the writer is already closed
+       require.ErrorContains(t, writer.WriteBuffered(nil), "already closed")
+       require.ErrorContains(t, writer.Write(nil), "already closed")
+       require.ErrorContains(t, writer.WriteColumnChunked(nil, 0, 0), "already 
closed")
+       require.ErrorContains(t, writer.WriteColumnData(nil), "already closed")
+}

Reply via email to