necro351 opened a new issue, #14865:
URL: https://github.com/apache/arrow/issues/14865

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   I am testing a benchmark with different parquet libraries to write an Arrow 
buffer to file. My benchmark checks the allocator is empty at the end of its 
run. I found that pqarrow retains a memory.Buffer if maybeParentNulls is true, 
and never releases it. I looked through the API to find a Release() function or 
some functionality that would release this buffer but did not find anything.
   
   I noticed the unit tests do not check the allocator is zeroed out.
   
   This is the if-statement I am concerned about:
   ```
   // WriteArrowToColumn writes apache arrow columnar data directly to a 
ColumnWriter.
   // Returns non-nil error if the array data type is not compatible with the 
concrete
   // writer type.
   // 
   // leafArr is always a primitive (possibly dictionary encoded type).
   // Leaf_field_nullable indicates whether the leaf array is considered 
nullable
   // according to its schema in a Table or its parent array.
   func WriteArrowToColumn(ctx context.Context, cw file.ColumnChunkWriter, 
leafArr arrow.Array, defLevels, repLevels []int16, leafFieldNullable bool) 
error {
      // Leaf nulls are canonical when there is only a single null element 
after a list
      // and it is at the leaf.
      colLevelInfo := cw.LevelInfo()
      singleNullable := (colLevelInfo.DefLevel == 
colLevelInfo.RepeatedAncestorDefLevel+1) && leafFieldNullable
      maybeParentNulls := colLevelInfo.HasNullableValues() && !singleNullable
   
      if maybeParentNulls {
         buf := memory.NewResizableBuffer(cw.Properties().Allocator())
   ---NON-RELEASED ALLOC HERE--->      
buf.Resize(int(bitutil.BytesForBits(cw.Properties().WriteBatchSize())))
         cw.SetBitsBuffer(buf)
      }
   ...
   ```
   
   This is the suspicious allocation (I added a PrintStack call in my own 
custom debug allocator to print this):
   ```
   goroutine 19 [running]:                                                      
                                                                                
                                                                                
                                            
   runtime/debug.Stack()                                                        
                                                                                
                                                                                
                                            
           /usr/local/go/src/runtime/debug/stack.go:24 +0x65                    
                                                                                
                                                                                
                                            
   runtime/debug.PrintStack()                                                   
                                                                                
                                                                                
                                            
           /usr/local/go/src/runtime/debug/stack.go:16 +0x19                    
                                                                                
                                                                                
                                            
   
gitlab.eng.vmware.com/taurus/data-mesh.git/compact-lake/rows.(*VerboseAllocator).Allocate(0xc0002d5da0,
 0x80)                                                                          
                                                                                
                  
           /home/rick/data-mesh/compact-lake/rows/buffer_test.go:145 +0x6a      
                                                                                
                                                                                
                                            
   github.com/apache/arrow/go/v11/arrow/memory.(*Buffer).Reserve(0xc00011ee10, 
0xc0001596b0?)                                                                  
                                                                                
                                             
           
/home/rick/go/pkg/mod/github.com/apache/arrow/go/[email protected]/arrow/memory/buffer.go:110
 +0x5b                                                                          
                                                                              
   github.com/apache/arrow/go/v11/arrow/memory.(*Buffer).resize(0xc00011ee10, 
0x80, 0xf0?)                                                                    
                                                                                
                                              
           
/home/rick/go/pkg/mod/github.com/apache/arrow/go/[email protected]/arrow/memory/buffer.go:130
 +0xf4                                                                          
                                                                              
   github.com/apache/arrow/go/v11/arrow/memory.(*Buffer).Resize(...)            
                                                                                
                                                                                
                                            
           
/home/rick/go/pkg/mod/github.com/apache/arrow/go/[email protected]/arrow/memory/buffer.go:119
                                                                                
                                                                              
   github.com/apache/arrow/go/v11/parquet/pqarrow.WriteArrowToColumn({0xddcd60, 
0xc000130008}, {0xde0c40, 0xc00029e180}, {0xde2040?, 0xc0001736c0?}, 
{0xc0002d8500, 0x1, 0x80}, {0x0, ...}, ...)                                     
                                                       
           
/home/rick/go/pkg/mod/github.com/apache/arrow/go/[email protected]/parquet/pqarrow/encode_arrow.go:219
 +0x159                                                                         
                                                                     
   
github.com/apache/arrow/go/v11/parquet/pqarrow.(*ArrowColumnWriter).Write(0xc000159b60,
 {0xddcd60, 0xc000130008})                                                      
                                                                                
                                  
           
/home/rick/go/pkg/mod/github.com/apache/arrow/go/[email protected]/parquet/pqarrow/encode_arrow.go:195
 +0x533                                                                         
                                                                     
   
gitlab.eng.vmware.com/taurus/data-mesh.git/compact-lake/rows.(*Buffer).writeColumns(0xcbd4d5?,
 {0xddcd60, 0xc000130008}, 0xc000066c90?, {0xc0003382d0, 0x1, 0x0?}, 0x8?)      
                                                                                
                           
           /home/rick/data-mesh/compact-lake/rows/buffer.go:352 +0x3fa          
                                                                                
                                                                                
                                            
   
gitlab.eng.vmware.com/taurus/data-mesh.git/compact-lake/rows.(*Buffer).WriteParquet(0xc000159eb8,
 {0xddcd60, 0xc000130008}, {0xddbaf0?, 0xc0002d5da0?}, {0xdd80a0, 
0xc00011a2a0})                                                                  
                                      
           /home/rick/data-mesh/compact-lake/rows/buffer.go:323 +0x8dd          
                          
   
gitlab.eng.vmware.com/taurus/data-mesh.git/compact-lake/rows.TestWriteOneRowParquet(0xc000105d40)
      
           /home/rick/data-mesh/compact-lake/rows/buffer_test.go:78 +0x671      
                                                               
   testing.tRunner(0xc000105d40, 0xcfc730)                               
           /usr/local/go/src/testing/testing.go:1439 +0x102                     
                                                               
   created by testing.(*T).Run                                           
           /usr/local/go/src/testing/testing.go:1486 +0x35f  
   ```
   
   ### Component(s)
   
   Go


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to