This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch temp-parquet-pqarrow in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 59e421893a644ee952ed53edb16d258d3c9a8ebc Author: Matthew Topol <[email protected]> AuthorDate: Tue Nov 9 13:57:31 2021 -0500 fix memory leak and string writing --- go/parquet/pqarrow/encode_arrow.go | 4 +++- go/parquet/pqarrow/encode_arrow_test.go | 30 +++++++++++++++--------------- go/parquet/pqarrow/file_writer.go | 4 +++- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go index 5d11718..8114f66 100644 --- a/go/parquet/pqarrow/encode_arrow.go +++ b/go/parquet/pqarrow/encode_arrow.go @@ -225,6 +225,7 @@ func WriteArrowToColumn(ctx context.Context, cw file.ColumnChunkWriter, leafArr type binaryarr interface { ValueBytes() []byte ValueOffsets() []int32 + ValueOffset(i int) int } func writeDenseArrow(ctx *arrowWriteContext, cw file.ColumnChunkWriter, leafArr array.Interface, defLevels, repLevels []int16, maybeParentNulls bool) (err error) { @@ -419,9 +420,10 @@ func writeDenseArrow(ctx *arrowWriteContext, cw file.ColumnChunkWriter, leafArr offsets = leafArr.(binaryarr).ValueOffsets() ) + firstOffset := leafArr.(binaryarr).ValueOffset(0) data := make([]parquet.ByteArray, leafArr.Len()) for i := range data { - data[i] = parquet.ByteArray(buffer[offsets[i]:offsets[i+1]]) + data[i] = parquet.ByteArray(buffer[offsets[i]-int32(firstOffset) : offsets[i+1]-int32(firstOffset)]) } if !maybeParentNulls && noNulls { wr.WriteBatch(data, defLevels, repLevels) diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 84d8f22..40daa3f 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -820,21 +820,21 @@ func (ps *ParquetIOTestSuite) readAndCheckSingleColumnFile(data []byte, values a } var fullTypeList = []arrow.DataType{ - // arrow.FixedWidthTypes.Boolean, - // arrow.PrimitiveTypes.Uint8, - // arrow.PrimitiveTypes.Int8, - // arrow.PrimitiveTypes.Uint16, - // arrow.PrimitiveTypes.Int16, - // arrow.PrimitiveTypes.Uint32, - // arrow.PrimitiveTypes.Int32, - // arrow.PrimitiveTypes.Uint64, - // arrow.PrimitiveTypes.Int64, - // arrow.FixedWidthTypes.Date32, - // arrow.PrimitiveTypes.Float32, - // arrow.PrimitiveTypes.Float64, - // arrow.BinaryTypes.String, - // arrow.BinaryTypes.Binary, - // &arrow.FixedSizeBinaryType{ByteWidth: 10}, + arrow.FixedWidthTypes.Boolean, + arrow.PrimitiveTypes.Uint8, + arrow.PrimitiveTypes.Int8, + arrow.PrimitiveTypes.Uint16, + arrow.PrimitiveTypes.Int16, + arrow.PrimitiveTypes.Uint32, + arrow.PrimitiveTypes.Int32, + arrow.PrimitiveTypes.Uint64, + arrow.PrimitiveTypes.Int64, + arrow.FixedWidthTypes.Date32, + arrow.PrimitiveTypes.Float32, + arrow.PrimitiveTypes.Float64, + arrow.BinaryTypes.String, + arrow.BinaryTypes.Binary, + &arrow.FixedSizeBinaryType{ByteWidth: 10}, &arrow.Decimal128Type{Precision: 1, Scale: 0}, &arrow.Decimal128Type{Precision: 5, Scale: 4}, &arrow.Decimal128Type{Precision: 10, Scale: 9}, diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index 5109602..d61b512 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -287,5 +287,7 @@ func (fw *FileWriter) WriteColumnChunked(data *array.Chunked, offset, size int64 // it is based on the current column of the row group writer allowing progressive building // of the file by columns without needing a full record or table to write. func (fw *FileWriter) WriteColumnData(data array.Interface) error { - return fw.WriteColumnChunked(array.NewChunked(data.DataType(), []array.Interface{data}), 0, int64(data.Len())) + chnked := array.NewChunked(data.DataType(), []array.Interface{data}) + defer chnked.Release() + return fw.WriteColumnChunked(chnked, 0, int64(data.Len())) }
