This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch temp-parquet-pqarrow
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 59e421893a644ee952ed53edb16d258d3c9a8ebc
Author: Matthew Topol <[email protected]>
AuthorDate: Tue Nov 9 13:57:31 2021 -0500

    fix memory leak and string writing
---
 go/parquet/pqarrow/encode_arrow.go      |  4 +++-
 go/parquet/pqarrow/encode_arrow_test.go | 30 +++++++++++++++---------------
 go/parquet/pqarrow/file_writer.go       |  4 +++-
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/go/parquet/pqarrow/encode_arrow.go 
b/go/parquet/pqarrow/encode_arrow.go
index 5d11718..8114f66 100644
--- a/go/parquet/pqarrow/encode_arrow.go
+++ b/go/parquet/pqarrow/encode_arrow.go
@@ -225,6 +225,7 @@ func WriteArrowToColumn(ctx context.Context, cw 
file.ColumnChunkWriter, leafArr
 type binaryarr interface {
        ValueBytes() []byte
        ValueOffsets() []int32
+       ValueOffset(i int) int
 }
 
 func writeDenseArrow(ctx *arrowWriteContext, cw file.ColumnChunkWriter, 
leafArr array.Interface, defLevels, repLevels []int16, maybeParentNulls bool) 
(err error) {
@@ -419,9 +420,10 @@ func writeDenseArrow(ctx *arrowWriteContext, cw 
file.ColumnChunkWriter, leafArr
                        offsets = leafArr.(binaryarr).ValueOffsets()
                )
 
+               firstOffset := leafArr.(binaryarr).ValueOffset(0)
                data := make([]parquet.ByteArray, leafArr.Len())
                for i := range data {
-                       data[i] = 
parquet.ByteArray(buffer[offsets[i]:offsets[i+1]])
+                       data[i] = 
parquet.ByteArray(buffer[offsets[i]-int32(firstOffset) : 
offsets[i+1]-int32(firstOffset)])
                }
                if !maybeParentNulls && noNulls {
                        wr.WriteBatch(data, defLevels, repLevels)
diff --git a/go/parquet/pqarrow/encode_arrow_test.go 
b/go/parquet/pqarrow/encode_arrow_test.go
index 84d8f22..40daa3f 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -820,21 +820,21 @@ func (ps *ParquetIOTestSuite) 
readAndCheckSingleColumnFile(data []byte, values a
 }
 
 var fullTypeList = []arrow.DataType{
-       // arrow.FixedWidthTypes.Boolean,
-       // arrow.PrimitiveTypes.Uint8,
-       // arrow.PrimitiveTypes.Int8,
-       // arrow.PrimitiveTypes.Uint16,
-       // arrow.PrimitiveTypes.Int16,
-       // arrow.PrimitiveTypes.Uint32,
-       // arrow.PrimitiveTypes.Int32,
-       // arrow.PrimitiveTypes.Uint64,
-       // arrow.PrimitiveTypes.Int64,
-       // arrow.FixedWidthTypes.Date32,
-       // arrow.PrimitiveTypes.Float32,
-       // arrow.PrimitiveTypes.Float64,
-       // arrow.BinaryTypes.String,
-       // arrow.BinaryTypes.Binary,
-       // &arrow.FixedSizeBinaryType{ByteWidth: 10},
+       arrow.FixedWidthTypes.Boolean,
+       arrow.PrimitiveTypes.Uint8,
+       arrow.PrimitiveTypes.Int8,
+       arrow.PrimitiveTypes.Uint16,
+       arrow.PrimitiveTypes.Int16,
+       arrow.PrimitiveTypes.Uint32,
+       arrow.PrimitiveTypes.Int32,
+       arrow.PrimitiveTypes.Uint64,
+       arrow.PrimitiveTypes.Int64,
+       arrow.FixedWidthTypes.Date32,
+       arrow.PrimitiveTypes.Float32,
+       arrow.PrimitiveTypes.Float64,
+       arrow.BinaryTypes.String,
+       arrow.BinaryTypes.Binary,
+       &arrow.FixedSizeBinaryType{ByteWidth: 10},
        &arrow.Decimal128Type{Precision: 1, Scale: 0},
        &arrow.Decimal128Type{Precision: 5, Scale: 4},
        &arrow.Decimal128Type{Precision: 10, Scale: 9},
diff --git a/go/parquet/pqarrow/file_writer.go 
b/go/parquet/pqarrow/file_writer.go
index 5109602..d61b512 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -287,5 +287,7 @@ func (fw *FileWriter) WriteColumnChunked(data 
*array.Chunked, offset, size int64
 // it is based on the current column of the row group writer allowing 
progressive building
 // of the file by columns without needing a full record or table to write.
 func (fw *FileWriter) WriteColumnData(data array.Interface) error {
-       return fw.WriteColumnChunked(array.NewChunked(data.DataType(), 
[]array.Interface{data}), 0, int64(data.Len()))
+       chnked := array.NewChunked(data.DataType(), []array.Interface{data})
+       defer chnked.Release()
+       return fw.WriteColumnChunked(chnked, 0, int64(data.Len()))
 }

Reply via email to