[ 
https://issues.apache.org/jira/browse/ARROW-15946?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

David Li reassigned ARROW-15946:
--------------------------------

    Assignee: Min-Young Wu

> [Go] Memory leak in pqarrow.NewColumnWriter with nested structures
> ------------------------------------------------------------------
>
>                 Key: ARROW-15946
>                 URL: https://issues.apache.org/jira/browse/ARROW-15946
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Go, Parquet
>            Reporter: Min-Young Wu
>            Assignee: Min-Young Wu
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 8.0.0
>
>          Time Spent: 2h 20m
>  Remaining Estimate: 0h
>
> There seems to be a memory leak (well, using the default allocator, it would 
> just be an accounting error?) when writing nested structures using 
> pqarrow.FileWriter
> Repro:
> {code:go}
> package main
> import (
>       "bytes"
>       "fmt"
>       "github.com/apache/arrow/go/v7/arrow"
>       "github.com/apache/arrow/go/v7/arrow/array"
>       "github.com/apache/arrow/go/v7/arrow/memory"
>       "github.com/apache/arrow/go/v7/parquet"
>       "github.com/apache/arrow/go/v7/parquet/compress"
>       "github.com/apache/arrow/go/v7/parquet/pqarrow"
> )
> func main() {
>       allocator := memory.NewCheckedAllocator(memory.DefaultAllocator)
>       sc := arrow.NewSchema([]arrow.Field{
>               {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: 
> true},
>               {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
>               {Name: "struct_i64_f64", Type: arrow.StructOf(
>                       arrow.Field{Name: "i64", Type: 
> arrow.PrimitiveTypes.Int64, Nullable: true},
>                       arrow.Field{Name: "f64", Type: 
> arrow.PrimitiveTypes.Float64, Nullable: true})},
>       }, nil)
>       bld := array.NewRecordBuilder(allocator, sc)
>       bld.Field(0).(*array.Float32Builder).Append(1.0)
>       bld.Field(1).(*array.Int32Builder).Append(1)
>       sbld := bld.Field(2).(*array.StructBuilder)
>       sbld.Append(true)
>       sbld.FieldBuilder(0).(*array.Int64Builder).Append(1)
>       sbld.FieldBuilder(1).(*array.Float64Builder).Append(1.0)
>       rec := bld.NewRecord()
>       bld.Release()
>       var buf bytes.Buffer
>       wr, err := pqarrow.NewFileWriter(sc, &buf,
>               
> parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy)),
>               
> pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(allocator)))
>       if err != nil {
>               panic(err)
>       }
>       err = wr.Write(rec)
>       if err != nil {
>               panic(err)
>       }
>       rec.Release()
>       wr.Close()
>       if allocator.CurrentAlloc() != 0 {
>               fmt.Printf("remaining allocation size: %d\n", 
> allocator.CurrentAlloc())
>       }
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to