[GitHub] [arrow] zeroshade commented on a change in pull request #10379: ARROW-12851: [Go][Parquet] Add Golang Parquet encoding package

GitBox Fri, 18 Jun 2021 14:07:49 -0700


zeroshade commented on a change in pull request #10379:
URL: https://github.com/apache/arrow/pull/10379#discussion_r653979156




##########
File path: go/parquet/internal/encoding/decoder.go
##########
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "bytes"
+       "reflect"
+
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/debug"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "github.com/apache/arrow/go/parquet/schema"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+       Decoder(parquet.Encoding, *schema.Column, bool, memory.Allocator) 
TypedDecoder
+       BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem 
memory.Allocator) TypedDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       return traits.Decoder(e, descr, false, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if 
type is bool.

Review comment:
       I could definitely add a place above this in the code that will call 
this to recover from the panic and return an error, or just return the error 
here. I'll take a look and see which makes the most sense as far as code 
readability.

##########
File path: go/parquet/internal/encoding/decoder.go
##########
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "bytes"
+       "reflect"
+
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/debug"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "github.com/apache/arrow/go/parquet/schema"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+       Decoder(parquet.Encoding, *schema.Column, bool, memory.Allocator) 
TypedDecoder
+       BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem 
memory.Allocator) TypedDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       return traits.Decoder(e, descr, false, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if 
type is bool.
+//
+// if mem is nil, memory.DefaultAllocator will be used
+func NewDictDecoder(t parquet.Type, descr *schema.Column, mem 
memory.Allocator) DictDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       if mem == nil {
+               mem = memory.DefaultAllocator
+       }
+
+       return traits.Decoder(parquet.Encodings.RLEDict, descr, true, 
mem).(DictDecoder)
+}
+
+type decoder struct {
+       descr    *schema.Column
+       encoding format.Encoding
+       nvals    int
+       data     []byte
+       typeLen  int
+}
+
+// newDecoderBase constructs the base decoding object that is embedded in the
+// type specific decoders.
+func newDecoderBase(e format.Encoding, descr *schema.Column) decoder {
+       typeLen := -1
+       if descr != nil && descr.PhysicalType() == 
parquet.Types.FixedLenByteArray {
+               typeLen = int(descr.TypeLength())
+       }
+
+       return decoder{
+               descr:    descr,
+               encoding: e,
+               typeLen:  typeLen,
+       }
+}
+
+// SetData sets the data for decoding into the decoder to update the available
+// data bytes and number of values available.
+func (d *decoder) SetData(nvals int, data []byte) {
+       d.data = data
+       d.nvals = nvals
+}
+
+// ValuesLeft returns the number of remaining values that can be decoded
+func (d *decoder) ValuesLeft() int { return d.nvals }
+
+// Encoding returns the encoding type used by this decoder to decode the bytes.
+func (d *decoder) Encoding() parquet.Encoding { return 
parquet.Encoding(d.encoding) }
+
+type dictDecoder struct {
+       decoder
+       mem              memory.Allocator
+       dictValueDecoder utils.DictionaryConverter
+       idxDecoder       *utils.RleDecoder
+}
+
+// SetDict sets a decoder that can be used to decode the dictionary that is
+// used for this column in order to return the proper values.
+func (d *dictDecoder) SetDict(dict TypedDecoder) {
+       if dict.Type() != d.descr.PhysicalType() {
+               panic("parquet: mismatch dictionary and column data type")
+       }
+
+       d.dictValueDecoder = NewDictConverter(dict)
+}
+
+// SetData sets the index value data into the decoder.
+func (d *dictDecoder) SetData(nvals int, data []byte) {
+       d.nvals = nvals
+       if len(data) == 0 {
+               d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 1)
+               return
+       }
+
+       width := uint8(data[0])
+       if width >= 64 {
+               panic("parquet: invalid or corrupted bit width")

Review comment:
       the common rules of thumb for panic vs returning an error are along the 
lines of catastrophic un-recoverable errors (such as programmer errors ie: 
dereferencing nil, or going out of bounds on a slice) for panics, and to avoid 
using panic's as a flow control mechanism like say, exceptions in python. It 
should be for rare occurrences that are truly exceptional.....
   
   However, you can also end up with extremely overly verbose code if every 
thing you do has to handle an error output, and even the go standard library 
packages will opt to use panics sometimes in order to make code cleaner or more 
readable.
   
   So i guess the good question here to have is when we fail, do we want to 
propagate an error all the way up the stack or should we just stick a recover 
at a higher level and throw panics from below.
   
   For now, this particular function is easy to convert to returning an error 
without impacting much readability or verbosity of the code.

##########
File path: go/parquet/internal/encoding/decoder.go
##########
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "bytes"
+       "reflect"
+
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/debug"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "github.com/apache/arrow/go/parquet/schema"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+       Decoder(parquet.Encoding, *schema.Column, bool, memory.Allocator) 
TypedDecoder
+       BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem 
memory.Allocator) TypedDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       return traits.Decoder(e, descr, false, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if 
type is bool.
+//
+// if mem is nil, memory.DefaultAllocator will be used
+func NewDictDecoder(t parquet.Type, descr *schema.Column, mem 
memory.Allocator) DictDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       if mem == nil {
+               mem = memory.DefaultAllocator
+       }
+
+       return traits.Decoder(parquet.Encodings.RLEDict, descr, true, 
mem).(DictDecoder)
+}
+
+type decoder struct {
+       descr    *schema.Column
+       encoding format.Encoding
+       nvals    int
+       data     []byte
+       typeLen  int
+}
+
+// newDecoderBase constructs the base decoding object that is embedded in the
+// type specific decoders.
+func newDecoderBase(e format.Encoding, descr *schema.Column) decoder {
+       typeLen := -1
+       if descr != nil && descr.PhysicalType() == 
parquet.Types.FixedLenByteArray {
+               typeLen = int(descr.TypeLength())
+       }
+
+       return decoder{
+               descr:    descr,
+               encoding: e,
+               typeLen:  typeLen,
+       }
+}
+
+// SetData sets the data for decoding into the decoder to update the available
+// data bytes and number of values available.
+func (d *decoder) SetData(nvals int, data []byte) {
+       d.data = data
+       d.nvals = nvals
+}
+
+// ValuesLeft returns the number of remaining values that can be decoded
+func (d *decoder) ValuesLeft() int { return d.nvals }
+
+// Encoding returns the encoding type used by this decoder to decode the bytes.
+func (d *decoder) Encoding() parquet.Encoding { return 
parquet.Encoding(d.encoding) }
+
+type dictDecoder struct {
+       decoder
+       mem              memory.Allocator
+       dictValueDecoder utils.DictionaryConverter
+       idxDecoder       *utils.RleDecoder
+}
+
+// SetDict sets a decoder that can be used to decode the dictionary that is
+// used for this column in order to return the proper values.
+func (d *dictDecoder) SetDict(dict TypedDecoder) {
+       if dict.Type() != d.descr.PhysicalType() {
+               panic("parquet: mismatch dictionary and column data type")
+       }
+
+       d.dictValueDecoder = NewDictConverter(dict)
+}
+
+// SetData sets the index value data into the decoder.
+func (d *dictDecoder) SetData(nvals int, data []byte) {
+       d.nvals = nvals
+       if len(data) == 0 {
+               d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 1)
+               return
+       }
+
+       width := uint8(data[0])
+       if width >= 64 {
+               panic("parquet: invalid or corrupted bit width")
+       }
+
+       d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data[1:]), 
int(width))

Review comment:
       yea, the first byte is the bit width, so we slice it off after grabbing 
it to hand the decoder the actual data to decode, minus the width. i'll add a 
comment

##########
File path: go/parquet/internal/encoding/decoder.go
##########
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "bytes"
+       "reflect"
+
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/debug"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "github.com/apache/arrow/go/parquet/schema"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+       Decoder(parquet.Encoding, *schema.Column, bool, memory.Allocator) 
TypedDecoder
+       BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem 
memory.Allocator) TypedDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       return traits.Decoder(e, descr, false, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if 
type is bool.
+//
+// if mem is nil, memory.DefaultAllocator will be used
+func NewDictDecoder(t parquet.Type, descr *schema.Column, mem 
memory.Allocator) DictDecoder {
+       traits := getDecodingTraits(t)
+       if traits == nil {
+               return nil
+       }
+
+       if mem == nil {
+               mem = memory.DefaultAllocator
+       }
+
+       return traits.Decoder(parquet.Encodings.RLEDict, descr, true, 
mem).(DictDecoder)
+}
+
+type decoder struct {
+       descr    *schema.Column
+       encoding format.Encoding
+       nvals    int
+       data     []byte
+       typeLen  int
+}
+
+// newDecoderBase constructs the base decoding object that is embedded in the
+// type specific decoders.
+func newDecoderBase(e format.Encoding, descr *schema.Column) decoder {
+       typeLen := -1
+       if descr != nil && descr.PhysicalType() == 
parquet.Types.FixedLenByteArray {
+               typeLen = int(descr.TypeLength())
+       }
+
+       return decoder{
+               descr:    descr,
+               encoding: e,
+               typeLen:  typeLen,
+       }
+}
+
+// SetData sets the data for decoding into the decoder to update the available
+// data bytes and number of values available.
+func (d *decoder) SetData(nvals int, data []byte) {
+       d.data = data
+       d.nvals = nvals
+}
+
+// ValuesLeft returns the number of remaining values that can be decoded
+func (d *decoder) ValuesLeft() int { return d.nvals }
+
+// Encoding returns the encoding type used by this decoder to decode the bytes.
+func (d *decoder) Encoding() parquet.Encoding { return 
parquet.Encoding(d.encoding) }
+
+type dictDecoder struct {
+       decoder
+       mem              memory.Allocator
+       dictValueDecoder utils.DictionaryConverter
+       idxDecoder       *utils.RleDecoder
+}
+
+// SetDict sets a decoder that can be used to decode the dictionary that is
+// used for this column in order to return the proper values.
+func (d *dictDecoder) SetDict(dict TypedDecoder) {
+       if dict.Type() != d.descr.PhysicalType() {
+               panic("parquet: mismatch dictionary and column data type")
+       }
+
+       d.dictValueDecoder = NewDictConverter(dict)
+}
+
+// SetData sets the index value data into the decoder.
+func (d *dictDecoder) SetData(nvals int, data []byte) {
+       d.nvals = nvals
+       if len(data) == 0 {
+               d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 1)

Review comment:
       that's actually a good point, i think it can safely be 0 for the bitwidth

##########
File path: go/parquet/internal/encoding/delta_bit_packing.go
##########
@@ -0,0 +1,515 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "bytes"
+       "math"
+       "math/bits"
+       "reflect"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "golang.org/x/xerrors"
+)
+
+// see the deltaBitPack encoder for a description of the encoding format that 
is
+// used for delta-bitpacking.
+type deltaBitPackDecoder struct {
+       decoder
+
+       mem memory.Allocator
+
+       usedFirst            bool
+       bitdecoder           *utils.BitReader
+       blockSize            uint64
+       currentBlockVals     uint64
+       miniBlocks           uint64
+       valsPerMini          uint64
+       currentMiniBlockVals uint64
+       minDelta             int64
+       miniBlockIdx         uint64
+
+       deltaBitWidths *memory.Buffer
+       deltaBitWidth  byte
+
+       lastVal int64
+}
+
+// returns the number of bytes read so far
+func (d *deltaBitPackDecoder) bytesRead() int64 {
+       return d.bitdecoder.CurOffset()
+}
+
+func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData sets the bytes and the expected number of values to decode
+// into the decoder, updating the decoder and allowing it to be reused.
+func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) {
+       // set our data into the underlying decoder for the type
+       d.decoder.SetData(nvalues, data)
+       // create a bit reader for our decoder's values
+       d.bitdecoder = utils.NewBitReader(bytes.NewReader(d.data))
+       d.currentBlockVals = 0
+       d.currentMiniBlockVals = 0
+       if d.deltaBitWidths == nil {
+               d.deltaBitWidths = memory.NewResizableBuffer(d.mem)
+       }
+
+       var ok bool
+       d.blockSize, ok = d.bitdecoder.GetVlqInt()
+       if !ok {
+               panic("parquet: eof exception")
+       }
+
+       if d.miniBlocks, ok = d.bitdecoder.GetVlqInt(); !ok {
+               panic("parquet: eof exception")
+       }
+
+       var totalValues uint64
+       if totalValues, ok = d.bitdecoder.GetVlqInt(); !ok {
+               panic("parquet: eof exception")
+       }
+
+       if int(totalValues) != d.nvals {
+               panic("parquet: mismatch between number of values and count in 
data header")
+       }
+
+       if d.lastVal, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
+               panic("parquet: eof exception")
+       }
+
+       d.valsPerMini = uint64(d.blockSize / d.miniBlocks)

Review comment:
       huh, i didn't know that. I didn't see anything in the spec other than 
that it would be a multiple of 32. i'll change it to a uint32 then, thanks!

##########
File path: go/parquet/internal/encoding/delta_byte_array.go
##########
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/internal/utils"
+       "golang.org/x/xerrors"
+)
+
+// DeltaByteArrayEncoder is an encoder for writing bytearrays which are delta 
encoded
+// this is also known as incremental encoding or front compression. For each 
element
+// in a sequence of strings, we store the prefix length of the previous entry 
plus the suffix
+// see https://en.wikipedia.org/wiki/Incremental_encoding for a longer 
description.
+//
+// This is stored as a sequence of delta-encoded prefix lengths followed by 
the suffixes
+// encoded as delta length byte arrays.
+type DeltaByteArrayEncoder struct {
+       encoder
+
+       prefixEncoder *DeltaBitPackInt32Encoder
+       suffixEncoder *DeltaLengthByteArrayEncoder
+
+       lastVal parquet.ByteArray
+}
+
+func (enc *DeltaByteArrayEncoder) initEncoders() {
+       enc.prefixEncoder = &DeltaBitPackInt32Encoder{
+               deltaBitPackEncoder: &deltaBitPackEncoder{encoder: 
newEncoderBase(enc.encoding, nil, enc.mem)}}
+       enc.suffixEncoder = &DeltaLengthByteArrayEncoder{
+               newEncoderBase(enc.encoding, nil, enc.mem),
+               &DeltaBitPackInt32Encoder{
+                       deltaBitPackEncoder: &deltaBitPackEncoder{encoder: 
newEncoderBase(enc.encoding, nil, enc.mem)}}}
+}
+
+// Type returns the underlying physical type this operates on, in this case 
ByteArrays only
+func (DeltaByteArrayEncoder) Type() parquet.Type { return 
parquet.Types.ByteArray }
+
+// Put writes a slice of ByteArrays to the encoder
+func (enc *DeltaByteArrayEncoder) Put(in []parquet.ByteArray) {
+       if len(in) == 0 {
+               return
+       }
+
+       var suf [1]parquet.ByteArray
+       if enc.prefixEncoder == nil { // initialize our encoders if we haven't 
yet
+               enc.initEncoders()
+               enc.prefixEncoder.Put([]int32{0})
+               suf[0] = in[0]
+               enc.lastVal = append([]byte(nil), in[0]...)
+               enc.suffixEncoder.Put(suf[:])
+               in = in[1:]
+       }
+
+       // for each value, figure out the common prefix with the previous value
+       // and then write the prefix length and the suffix.
+       for _, val := range in {
+               l1 := enc.lastVal.Len()
+               l2 := val.Len()
+               j := 0
+               for j < l1 && j < l2 {
+                       if enc.lastVal[j] != val[j] {
+                               break
+                       }
+                       j++
+               }
+               enc.prefixEncoder.Put([]int32{int32(j)})
+               suf[0] = val[j:]

Review comment:
       because the `DeltaLengthByteArrayEncoder` only takes in a slice of 
`ByteArray`'s rather than having a function which can take a single 
`ByteArray`, I create an array of a single `ByteArray` and use that to pass 
into the suffix encoder by creating a slice of it `suf[:]`. So since i had an 
array of size 1 bytearray, i needed to index into it using [0].
   
   All of that being said, as I wrote that I realized that it doesn't actually 
save me the creations of the slice to pass into the encoder, since it still 
ends up creating a slice when i do `suf[:]`, so i might as well just have a 
single parquet.ByteArray that i use and make it more obvious rather than this 
roundabout way it currently is. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] zeroshade commented on a change in pull request #10379: ARROW-12851: [Go][Parquet] Add Golang Parquet encoding package

Reply via email to