This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push: new d560d1f refactor: update linter and run it (#419) d560d1f is described below commit d560d1f1d811e1e47da04e53395913144cd9ae5a Author: Matt Topol <zotthewiz...@gmail.com> AuthorDate: Fri Jun 20 16:43:36 2025 -0400 refactor: update linter and run it (#419) ### Rationale for this change Update golangci-lint to v2 and run it for cleanup. --- .golangci.yaml | 30 ++++-- .pre-commit-config.yaml | 2 +- arrow/array/binary.go | 52 +++++----- arrow/array/binarybuilder.go | 22 ++-- arrow/array/boolean.go | 4 +- arrow/array/booleanbuilder.go | 8 +- arrow/array/bufferbuilder_byte.go | 2 +- arrow/array/compare.go | 4 +- arrow/array/concat_test.go | 10 +- arrow/array/decimal.go | 12 +-- arrow/array/dictionary.go | 18 ++-- arrow/array/encoded.go | 2 +- arrow/array/fixed_size_list.go | 12 +-- arrow/array/fixedsize_binary.go | 2 +- arrow/array/fixedsize_binarybuilder.go | 12 +-- arrow/array/float16.go | 4 +- arrow/array/float16_builder.go | 6 +- arrow/array/interval.go | 30 +++--- arrow/array/list.go | 40 ++++---- arrow/array/null.go | 8 +- arrow/array/numeric_generic.go | 4 +- arrow/array/string.go | 60 +++++------ arrow/array/struct.go | 8 +- arrow/array/timestamp.go | 10 +- arrow/avro/reader_test.go | 2 +- arrow/avro/reader_types.go | 8 +- arrow/avro/schema.go | 4 +- arrow/avro/schema_test.go | 2 +- arrow/cdata/cdata.go | 11 +- arrow/compute/exec.go | 2 +- arrow/compute/exec_internals_test.go | 2 +- arrow/compute/executor.go | 6 +- arrow/compute/expression.go | 4 +- arrow/compute/exprs/types.go | 12 +-- arrow/compute/functions.go | 2 +- arrow/compute/scalar_compare_test.go | 2 +- arrow/csv/reader.go | 4 +- arrow/datatype_extension_test.go | 2 +- arrow/decimal256/decimal256_test.go | 2 +- arrow/extensions/json.go | 2 +- arrow/extensions/variant.go | 7 +- arrow/flight/client.go | 4 +- arrow/float16/float16.go | 6 +- arrow/internal/arrjson/arrjson.go | 8 +- arrow/internal/testing/gen/random_array_gen.go | 137 +++++++------------------ arrow/ipc/compression.go | 2 +- arrow/scalar/append_test.go | 18 ++-- arrow/scalar/scalar_test.go | 2 +- arrow/util/protobuf_reflect.go | 10 +- internal/bitutils/bit_block_counter_test.go | 4 +- internal/bitutils/bitmap_generate.go | 8 +- internal/bitutils/bitmap_generate_test.go | 6 +- internal/types/extension_types.go | 4 +- parquet/encryption_write_config_test.go | 6 +- parquet/file/column_reader_test.go | 13 +-- parquet/file/file_reader_mmap.go | 4 +- parquet/file/level_conversion.go | 6 +- parquet/file/record_reader.go | 4 +- parquet/internal/encoding/decoder.go | 2 +- parquet/internal/testutils/random.go | 98 ++++++++++-------- parquet/internal/testutils/random_arrow.go | 28 ++--- parquet/internal/utils/bit_reader_test.go | 18 ++-- parquet/metadata/page_index.go | 10 +- parquet/pqarrow/path_builder.go | 8 +- parquet/pqarrow/reader_writer_test.go | 6 +- parquet/schema/reflection.go | 12 ++- 66 files changed, 414 insertions(+), 446 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index f850dc7..36ed5f0 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -15,15 +15,31 @@ # specific language governing permissions and limitations # under the License. +version: "2" linters: - # Disable all linters. - # Default: false - disable-all: true - # Enable specific linter - # https://golangci-lint.run/usage/linters/#enabled-by-default + default: none enable: - - gofmt - - goimports - staticcheck + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + paths: + - third_party$ + - builtin$ + - examples$ issues: fix: true +formatters: + enable: + - gofmt + - goimports + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 24ee353..b4df0ce 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: - repo: https://github.com/golangci/golangci-lint - rev: v1.61.0 + rev: v2.1.6 hooks: # no built-in support for multiple go.mod # https://github.com/golangci/golangci-lint/issues/828 diff --git a/arrow/array/binary.go b/arrow/array/binary.go index 5fef60e..942fe30 100644 --- a/arrow/array/binary.go +++ b/arrow/array/binary.go @@ -52,10 +52,10 @@ func NewBinaryData(data arrow.ArrayData) *Binary { // Value returns the slice at index i. This value should not be mutated. func (a *Binary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - idx := a.array.data.offset + i + idx := a.data.offset + i return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } @@ -75,10 +75,10 @@ func (a *Binary) ValueString(i int) string { } func (a *Binary) ValueOffset(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return int(a.valueOffsets[a.array.data.offset+i]) + return int(a.valueOffsets[a.data.offset+i]) } func (a *Binary) ValueOffset64(i int) int64 { @@ -86,22 +86,22 @@ func (a *Binary) ValueOffset64(i int) int64 { } func (a *Binary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - beg := a.array.data.offset + i + beg := a.data.offset + i return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) } func (a *Binary) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 + beg := a.data.offset + end := beg + a.data.length + 1 return a.valueOffsets[beg:end] } func (a *Binary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] } @@ -138,11 +138,11 @@ func (a *Binary) setData(data *Data) { a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes()) } - if a.array.data.length < 1 { + if a.data.length < 1 { return } - expNumOffsets := a.array.data.offset + a.array.data.length + 1 + expNumOffsets := a.data.offset + a.data.length + 1 if len(a.valueOffsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: binary offset buffer must have at least %d values", expNumOffsets)) } @@ -195,10 +195,10 @@ func NewLargeBinaryData(data arrow.ArrayData) *LargeBinary { } func (a *LargeBinary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - idx := a.array.data.offset + i + idx := a.data.offset + i return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } @@ -215,10 +215,10 @@ func (a *LargeBinary) ValueString(i int) string { } func (a *LargeBinary) ValueOffset(i int) int64 { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return a.valueOffsets[a.array.data.offset+i] + return a.valueOffsets[a.data.offset+i] } func (a *LargeBinary) ValueOffset64(i int) int64 { @@ -226,22 +226,22 @@ func (a *LargeBinary) ValueOffset64(i int) int64 { } func (a *LargeBinary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - beg := a.array.data.offset + i + beg := a.data.offset + i return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) } func (a *LargeBinary) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 + beg := a.data.offset + end := beg + a.data.length + 1 return a.valueOffsets[beg:end] } func (a *LargeBinary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] } @@ -278,11 +278,11 @@ func (a *LargeBinary) setData(data *Data) { a.valueOffsets = arrow.Int64Traits.CastFromBytes(valueOffsets.Bytes()) } - if a.array.data.length < 1 { + if a.data.length < 1 { return } - expNumOffsets := a.array.data.offset + a.array.data.length + 1 + expNumOffsets := a.data.offset + a.data.length + 1 if len(a.valueOffsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: large binary offset buffer must have at least %d values", expNumOffsets)) } @@ -353,10 +353,10 @@ func (a *BinaryView) setData(data *Data) { } func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return &a.values[a.array.data.offset+i] + return &a.values[a.data.offset+i] } func (a *BinaryView) Value(i int) []byte { diff --git a/arrow/array/binarybuilder.go b/arrow/array/binarybuilder.go index b37a103..28188d7 100644 --- a/arrow/array/binarybuilder.go +++ b/arrow/array/binarybuilder.go @@ -81,7 +81,7 @@ func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryB offsetByteWidth: offsetByteWidth, getOffsetVal: getOffsetVal, } - bb.builder.refCount.Add(1) + bb.refCount.Add(1) return bb } @@ -162,7 +162,7 @@ func (b *BinaryBuilder) AppendValues(v [][]byte, valid []bool) { b.values.Append(vv) } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } // AppendStringValues will append the values in the v slice. The valid slice determines which values @@ -183,7 +183,7 @@ func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { b.values.Append([]byte(vv)) } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BinaryBuilder) UnsafeAppend(v []byte) { @@ -218,7 +218,7 @@ func (b *BinaryBuilder) DataCap() int { return b.values.capacity } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *BinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // ReserveData ensures there is enough space for appending n bytes @@ -236,7 +236,7 @@ func (b *BinaryBuilder) Resize(n int) { if (n * b.offsetByteWidth) < b.offsets.Len() { b.offsets.SetLength(n * b.offsetByteWidth) } - b.builder.resize(n, b.init) + b.resize(n, b.init) } func (b *BinaryBuilder) ResizeData(n int) { @@ -291,7 +291,7 @@ func (b *BinaryBuilder) newData() (data *Data) { values.Release() } - b.builder.reset() + b.reset() return } @@ -397,7 +397,7 @@ func NewBinaryViewBuilder(mem memory.Allocator) *BinaryViewBuilder { mem: mem, }, } - bvb.builder.refCount.Add(1) + bvb.refCount.Add(1) bvb.blockBuilder.refCount.Add(1) return bvb } @@ -445,7 +445,7 @@ func (b *BinaryViewBuilder) Resize(n int) { return } - b.builder.resize(nbuild, b.init) + b.resize(nbuild, b.init) b.data.Resize(arrow.ViewHeaderTraits.BytesRequired(n)) b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) } @@ -459,7 +459,7 @@ func (b *BinaryViewBuilder) ReserveData(length int) { } func (b *BinaryViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } func (b *BinaryViewBuilder) Append(v []byte) { @@ -554,7 +554,7 @@ func (b *BinaryViewBuilder) AppendValues(v [][]byte, valid []bool) { } } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) { @@ -587,7 +587,7 @@ func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) { } } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } // AppendValueFromString is paired with ValueStr for fulfilling the diff --git a/arrow/array/boolean.go b/arrow/array/boolean.go index 1b28a9f..d579fa0 100644 --- a/arrow/array/boolean.go +++ b/arrow/array/boolean.go @@ -50,10 +50,10 @@ func NewBooleanData(data arrow.ArrayData) *Boolean { } func (a *Boolean) Value(i int) bool { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return bitutil.BitIsSet(a.values, a.array.data.offset+i) + return bitutil.BitIsSet(a.values, a.data.offset+i) } func (a *Boolean) ValueStr(i int) string { diff --git a/arrow/array/booleanbuilder.go b/arrow/array/booleanbuilder.go index a277ffd..fddc51b 100644 --- a/arrow/array/booleanbuilder.go +++ b/arrow/array/booleanbuilder.go @@ -38,7 +38,7 @@ type BooleanBuilder struct { func NewBooleanBuilder(mem memory.Allocator) *BooleanBuilder { bb := &BooleanBuilder{builder: builder{mem: mem}} - bb.builder.refCount.Add(1) + bb.refCount.Add(1) return bb } @@ -131,7 +131,7 @@ func (b *BooleanBuilder) AppendValues(v []bool, valid []bool) { for i, vv := range v { bitutil.SetBitTo(b.rawData, b.length+i, vv) } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BooleanBuilder) init(capacity int) { @@ -146,7 +146,7 @@ func (b *BooleanBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *BooleanBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -159,7 +159,7 @@ func (b *BooleanBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(n, b.init) + b.resize(n, b.init) b.data.Resize(arrow.BooleanTraits.BytesRequired(n)) b.rawData = b.data.Bytes() } diff --git a/arrow/array/bufferbuilder_byte.go b/arrow/array/bufferbuilder_byte.go index 61431b7..7502556 100644 --- a/arrow/array/bufferbuilder_byte.go +++ b/arrow/array/bufferbuilder_byte.go @@ -24,7 +24,7 @@ type byteBufferBuilder struct { func newByteBufferBuilder(mem memory.Allocator) *byteBufferBuilder { bbb := &byteBufferBuilder{bufferBuilder: bufferBuilder{mem: mem}} - bbb.bufferBuilder.refCount.Add(1) + bbb.refCount.Add(1) return bbb } diff --git a/arrow/array/compare.go b/arrow/array/compare.go index fda15f5..0278b4c 100644 --- a/arrow/array/compare.go +++ b/arrow/array/compare.go @@ -72,7 +72,7 @@ func RecordApproxEqual(left, right arrow.Record, opts ...EqualOption) bool { func chunkedBinaryApply(left, right *arrow.Chunked, fn func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool) { var ( pos int64 - length int64 = int64(left.Len()) + length = int64(left.Len()) leftIdx, rightIdx int leftPos, rightPos int64 ) @@ -118,7 +118,7 @@ func ChunkedEqual(left, right *arrow.Chunked) bool { return false } - var isequal bool = true + var isequal = true chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { isequal = SliceEqual(left, lbeg, lend, right, rbeg, rend) return isequal diff --git a/arrow/array/concat_test.go b/arrow/array/concat_test.go index 4eb0c66..0a8d544 100644 --- a/arrow/array/concat_test.go +++ b/arrow/array/concat_test.go @@ -19,6 +19,7 @@ package array_test import ( "fmt" "math" + "math/rand/v2" "sort" "strings" "testing" @@ -31,7 +32,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "golang.org/x/exp/rand" ) func TestConcatenateValueBuffersNull(t *testing.T) { @@ -298,9 +298,9 @@ func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, lengt func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { offsets := make([]int32, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) + dist := rand.New(rand.NewPCG(cts.seed, 0)) for i := range offsets { - offsets[i] = dist.Int31n(length + 1) + offsets[i] = dist.Int32N(length + 1) } sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) return offsets @@ -308,9 +308,9 @@ func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { offsets := make([]int64, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) + dist := rand.New(rand.NewPCG(cts.seed, 0)) for i := range offsets { - offsets[i] = dist.Int63n(length + 1) + offsets[i] = dist.Int64N(length + 1) } sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) return offsets diff --git a/arrow/array/decimal.go b/arrow/array/decimal.go index dff0fea..993242b 100644 --- a/arrow/array/decimal.go +++ b/arrow/array/decimal.go @@ -83,8 +83,8 @@ func (a *baseDecimal[T]) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.GetData[T](vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } @@ -187,7 +187,7 @@ func newDecimalBuilder[T interface { builder: builder{mem: mem}, dtype: dtype, } - bdb.builder.refCount.Add(1) + bdb.refCount.Add(1) return bdb } @@ -263,7 +263,7 @@ func (b *baseDecimalBuilder[T]) AppendValues(v []T, valid []bool) { if len(v) > 0 { copy(b.rawData[b.length:], v) } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *baseDecimalBuilder[T]) init(capacity int) { @@ -276,7 +276,7 @@ func (b *baseDecimalBuilder[T]) init(capacity int) { } func (b *baseDecimalBuilder[T]) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } func (b *baseDecimalBuilder[T]) Resize(n int) { @@ -288,7 +288,7 @@ func (b *baseDecimalBuilder[T]) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(b.traits.BytesRequired(n)) b.rawData = arrow.GetData[T](b.data.Bytes()) } diff --git a/arrow/array/dictionary.go b/arrow/array/dictionary.go index f71ca49..21dc962 100644 --- a/arrow/array/dictionary.go +++ b/arrow/array/dictionary.go @@ -63,7 +63,7 @@ type Dictionary struct { // and dictionary using the given type. func NewDictionaryArray(typ arrow.DataType, indices, dict arrow.Array) *Dictionary { a := &Dictionary{} - a.array.refCount.Add(1) + a.refCount.Add(1) dictdata := NewData(typ, indices.Len(), indices.Data().Buffers(), indices.Data().Children(), indices.NullN(), indices.Data().Offset()) dictdata.dictionary = dict.Data().(*Data) dict.Data().Retain() @@ -432,7 +432,7 @@ func createDictBuilder[T arrow.ValueType](mem memory.Allocator, idxbldr IndexBui dt: dt, }, } - ret.builder.refCount.Add(1) + ret.refCount.Add(1) if init != nil { if err := ret.InsertDictValues(init.(arrValues[T])); err != nil { @@ -451,7 +451,7 @@ func createBinaryDictBuilder(mem memory.Allocator, idxbldr IndexBuilder, memo ha dt: dt, }, } - ret.builder.refCount.Add(1) + ret.refCount.Add(1) if init != nil { switch v := init.(type) { @@ -479,7 +479,7 @@ func createFixedSizeDictBuilder[T fsbType](mem memory.Allocator, idxbldr IndexBu }, byteWidth: int(unsafe.Sizeof(z)), } - ret.builder.refCount.Add(1) + ret.refCount.Add(1) if init != nil { if err := ret.InsertDictValues(init.(arrValues[T])); err != nil { @@ -517,7 +517,7 @@ func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType dt: dt, }, } - ret.builder.refCount.Add(1) + ret.refCount.Add(1) debug.Assert(init == nil, "arrow/array: doesn't make sense to init a null dictionary") return ret case arrow.UINT8: @@ -554,7 +554,7 @@ func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType }, byteWidth: dt.ValueType.(*arrow.FixedSizeBinaryType).ByteWidth, } - ret.builder.refCount.Add(1) + ret.refCount.Add(1) if init != nil { if err = ret.InsertDictValues(init.(*FixedSizeBinary)); err != nil { @@ -656,7 +656,7 @@ func (b *dictionaryBuilder) Resize(n int) { } func (b *dictionaryBuilder) ResetFull() { - b.builder.reset() + b.reset() b.idxBuilder.NewArray().Release() b.memoTable.Reset() } @@ -1153,8 +1153,8 @@ func (b *FixedSizeBinaryDictionaryBuilder) Append(v []byte) error { func (b *FixedSizeBinaryDictionaryBuilder) InsertDictValues(arr *FixedSizeBinary) (err error) { var ( - beg = arr.array.data.offset * b.byteWidth - end = (arr.array.data.offset + arr.data.length) * b.byteWidth + beg = arr.data.offset * b.byteWidth + end = (arr.data.offset + arr.data.length) * b.byteWidth ) data := arr.valueBytes[beg:end] for len(data) > 0 { diff --git a/arrow/array/encoded.go b/arrow/array/encoded.go index 8e39090..2ce221f 100644 --- a/arrow/array/encoded.go +++ b/arrow/array/encoded.go @@ -312,7 +312,7 @@ func NewRunEndEncodedBuilder(mem memory.Allocator, runEnds, encoded arrow.DataTy maxRunEnd: maxEnd, lastUnmarshalled: nil, } - reb.builder.refCount.Add(1) + reb.refCount.Add(1) return reb } diff --git a/arrow/array/fixed_size_list.go b/arrow/array/fixed_size_list.go index 4a0524e..69cb67b 100644 --- a/arrow/array/fixed_size_list.go +++ b/arrow/array/fixed_size_list.go @@ -108,7 +108,7 @@ func (a *FixedSizeList) Len() int { return a.array.Len() } func (a *FixedSizeList) ValueOffsets(i int) (start, end int64) { n := int64(a.n) - off := int64(a.array.data.offset) + off := int64(a.data.offset) start, end = (off+int64(i))*n, (off+int64(i+1))*n return } @@ -177,7 +177,7 @@ func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType }, n, } - fslb.baseListBuilder.builder.refCount.Add(1) + fslb.refCount.Add(1) return fslb } @@ -194,7 +194,7 @@ func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow n, } - fslb.baseListBuilder.builder.refCount.Add(1) + fslb.refCount.Add(1) return fslb } @@ -254,7 +254,7 @@ func (b *FixedSizeListBuilder) AppendEmptyValues(n int) { func (b *FixedSizeListBuilder) AppendValues(valid []bool) { b.Reserve(len(valid)) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) + b.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) { @@ -273,7 +273,7 @@ func (b *FixedSizeListBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeListBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -286,7 +286,7 @@ func (b *FixedSizeListBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(n, b.builder.init) + b.resize(n, b.builder.init) } } diff --git a/arrow/array/fixedsize_binary.go b/arrow/array/fixedsize_binary.go index a3b0380..31d507c 100644 --- a/arrow/array/fixedsize_binary.go +++ b/arrow/array/fixedsize_binary.go @@ -44,7 +44,7 @@ func NewFixedSizeBinaryData(data arrow.ArrayData) *FixedSizeBinary { // Value returns the fixed-size slice at index i. This value should not be mutated. func (a *FixedSizeBinary) Value(i int) []byte { - i += a.array.data.offset + i += a.data.offset var ( bw = int(a.bytewidth) beg = i * bw diff --git a/arrow/array/fixedsize_binarybuilder.go b/arrow/array/fixedsize_binarybuilder.go index ee7869f..ca5a10c 100644 --- a/arrow/array/fixedsize_binarybuilder.go +++ b/arrow/array/fixedsize_binarybuilder.go @@ -42,7 +42,7 @@ func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinar dtype: dtype, values: newByteBufferBuilder(mem), } - b.builder.refCount.Add(1) + b.refCount.Add(1) return b } @@ -130,7 +130,7 @@ func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) { } } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *FixedSizeBinaryBuilder) init(capacity int) { @@ -141,13 +141,13 @@ func (b *FixedSizeBinaryBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeBinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *FixedSizeBinaryBuilder) Resize(n int) { - b.builder.resize(n, b.init) + b.resize(n, b.init) } // NewArray creates a FixedSizeBinary array from the memory buffers used by the @@ -173,7 +173,7 @@ func (b *FixedSizeBinaryBuilder) newData() (data *Data) { values.Release() } - b.builder.reset() + b.reset() return } @@ -223,7 +223,7 @@ func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { if len(val) != b.dtype.ByteWidth { return &json.UnmarshalTypeError{ - Value: fmt.Sprint(val), + Value: fmt.Sprint(string(val)), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), diff --git a/arrow/array/float16.go b/arrow/array/float16.go index 5f57f72..4127680 100644 --- a/arrow/array/float16.go +++ b/arrow/array/float16.go @@ -71,8 +71,8 @@ func (a *Float16) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.Float16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } diff --git a/arrow/array/float16_builder.go b/arrow/array/float16_builder.go index d4acd7f..60cc91b 100644 --- a/arrow/array/float16_builder.go +++ b/arrow/array/float16_builder.go @@ -121,7 +121,7 @@ func (b *Float16Builder) AppendValues(v []float16.Num, valid []bool) { if len(v) > 0 { arrow.Float16Traits.Copy(b.rawData[b.length:], v) } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Float16Builder) init(capacity int) { @@ -136,7 +136,7 @@ func (b *Float16Builder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Float16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -150,7 +150,7 @@ func (b *Float16Builder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(arrow.Float16Traits.BytesRequired(n)) b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) } diff --git a/arrow/array/interval.go b/arrow/array/interval.go index 54915cd..2900a59 100644 --- a/arrow/array/interval.go +++ b/arrow/array/interval.go @@ -88,8 +88,8 @@ func (a *MonthInterval) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.MonthIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } @@ -220,7 +220,7 @@ func (b *MonthIntervalBuilder) AppendValues(v []arrow.MonthInterval, valid []boo b.Reserve(len(v)) arrow.MonthIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *MonthIntervalBuilder) init(capacity int) { @@ -235,7 +235,7 @@ func (b *MonthIntervalBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *MonthIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -249,7 +249,7 @@ func (b *MonthIntervalBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(arrow.MonthIntervalTraits.BytesRequired(n)) b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) } @@ -392,8 +392,8 @@ func (a *DayTimeInterval) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.DayTimeIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } @@ -522,7 +522,7 @@ func (b *DayTimeIntervalBuilder) AppendValues(v []arrow.DayTimeInterval, valid [ b.Reserve(len(v)) arrow.DayTimeIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *DayTimeIntervalBuilder) init(capacity int) { @@ -537,7 +537,7 @@ func (b *DayTimeIntervalBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *DayTimeIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -551,7 +551,7 @@ func (b *DayTimeIntervalBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(arrow.DayTimeIntervalTraits.BytesRequired(n)) b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) } @@ -695,8 +695,8 @@ func (a *MonthDayNanoInterval) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.MonthDayNanoIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } @@ -827,7 +827,7 @@ func (b *MonthDayNanoIntervalBuilder) AppendValues(v []arrow.MonthDayNanoInterva b.Reserve(len(v)) arrow.MonthDayNanoIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *MonthDayNanoIntervalBuilder) init(capacity int) { @@ -842,7 +842,7 @@ func (b *MonthDayNanoIntervalBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *MonthDayNanoIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -856,7 +856,7 @@ func (b *MonthDayNanoIntervalBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(arrow.MonthDayNanoIntervalTraits.BytesRequired(n)) b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) } diff --git a/arrow/array/list.go b/arrow/array/list.go index 806b89c..d87cc26 100644 --- a/arrow/array/list.go +++ b/arrow/array/list.go @@ -164,8 +164,8 @@ func (a *List) Release() { } func (a *List) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset + debug.Assert(i >= 0 && i < a.data.length, "index out of range") + j := i + a.data.offset start, end = int64(a.offsets[j]), int64(a.offsets[j+1]) return } @@ -286,8 +286,8 @@ func (a *LargeList) Len() int { return a.array.Len() } func (a *LargeList) Offsets() []int64 { return a.offsets } func (a *LargeList) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset + debug.Assert(i >= 0 && i < a.data.length, "index out of range") + j := i + a.data.offset start, end = a.offsets[j], a.offsets[j+1] return } @@ -468,13 +468,13 @@ func (b *baseListBuilder) AppendEmptyValues(n int) { func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int32Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) + b.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *LargeListBuilder) AppendValues(offsets []int64, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int64Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) + b.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *baseListBuilder) unsafeAppendBoolToBitmap(isValid bool) { @@ -494,7 +494,7 @@ func (b *baseListBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *baseListBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) + b.reserve(n, b.resizeHelper) b.offsets.Reserve(n) } @@ -513,7 +513,7 @@ func (b *baseListBuilder) resizeHelper(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(n, b.builder.init) + b.resize(n, b.builder.init) } } @@ -772,8 +772,8 @@ func (a *ListView) Release() { } func (a *ListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset + debug.Assert(i >= 0 && i < a.data.length, "index out of range") + j := i + a.data.offset size := int64(a.sizes[j]) // If size is 0, skip accessing offsets. if size == 0 { @@ -909,8 +909,8 @@ func (a *LargeListView) Offsets() []int64 { return a.offsets } func (a *LargeListView) Sizes() []int64 { return a.sizes } func (a *LargeListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset + debug.Assert(i >= 0 && i < a.data.length, "index out of range") + j := i + a.data.offset size := a.sizes[j] // If size is 0, skip accessing offsets. if size == 0 { @@ -1035,9 +1035,9 @@ func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes, offsetByteWidth } func (a *ListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] + values := a.data.childData[0] offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) + return a.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) } func (a *ListView) Validate() error { @@ -1049,9 +1049,9 @@ func (a *ListView) ValidateFull() error { } func (a *LargeListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] + values := a.data.childData[0] offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) + return a.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) } func (a *LargeListView) Validate() error { @@ -1238,14 +1238,14 @@ func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes []int32, b.Reserve(len(valid)) b.offsets.(*Int32Builder).AppendValues(offsets, nil) b.sizes.(*Int32Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) + b.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes []int64, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int64Builder).AppendValues(offsets, nil) b.sizes.(*Int64Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) + b.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) { @@ -1266,7 +1266,7 @@ func (b *baseListViewBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *baseListViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) + b.reserve(n, b.resizeHelper) b.offsets.Reserve(n) b.sizes.Reserve(n) } @@ -1287,7 +1287,7 @@ func (b *baseListViewBuilder) resizeHelper(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(n, b.builder.init) + b.resize(n, b.builder.init) } } diff --git a/arrow/array/null.go b/arrow/array/null.go index 38b3b09..02ea12e 100644 --- a/arrow/array/null.go +++ b/arrow/array/null.go @@ -76,8 +76,8 @@ func (a *Null) String() string { func (a *Null) setData(data *Data) { a.array.setData(data) - a.array.nullBitmapBytes = nil - a.array.data.nulls = a.array.data.length + a.nullBitmapBytes = nil + a.data.nulls = a.data.length } func (a *Null) GetOneForMarshal(i int) interface{} { @@ -115,8 +115,8 @@ func (b *NullBuilder) Release() { } func (b *NullBuilder) AppendNull() { - b.builder.length++ - b.builder.nulls++ + b.length++ + b.nulls++ } func (b *NullBuilder) AppendNulls(n int) { diff --git a/arrow/array/numeric_generic.go b/arrow/array/numeric_generic.go index 016dc37..874e86d 100644 --- a/arrow/array/numeric_generic.go +++ b/arrow/array/numeric_generic.go @@ -68,8 +68,8 @@ func (a *numericArray[T]) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.GetData[T](vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } diff --git a/arrow/array/string.go b/arrow/array/string.go index d42492d..dd2e2d2 100644 --- a/arrow/array/string.go +++ b/arrow/array/string.go @@ -56,7 +56,7 @@ func (a *String) Reset(data arrow.ArrayData) { // Value returns the slice at index i. This value should not be mutated. func (a *String) Value(i int) string { - i = i + a.array.data.offset + i = i + a.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } @@ -69,10 +69,10 @@ func (a *String) ValueStr(i int) string { // ValueOffset returns the offset of the value at index i. func (a *String) ValueOffset(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return int(a.offsets[i+a.array.data.offset]) + return int(a.offsets[i+a.data.offset]) } func (a *String) ValueOffset64(i int) int64 { @@ -80,23 +80,23 @@ func (a *String) ValueOffset64(i int) int64 { } func (a *String) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - beg := a.array.data.offset + i + beg := a.data.offset + i return int(a.offsets[beg+1] - a.offsets[beg]) } func (a *String) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 + beg := a.data.offset + end := beg + a.data.length + 1 return a.offsets[beg:end] } func (a *String) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { + beg := a.data.offset + end := beg + a.data.length + if a.data.buffers[2] != nil { return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] } return nil @@ -136,11 +136,11 @@ func (a *String) setData(data *Data) { a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) } - if a.array.data.length < 1 { + if a.data.length < 1 { return } - expNumOffsets := a.array.data.offset + a.array.data.length + 1 + expNumOffsets := a.data.offset + a.data.length + 1 if len(a.offsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) } @@ -203,7 +203,7 @@ func (a *LargeString) Reset(data arrow.ArrayData) { // Value returns the slice at index i. This value should not be mutated. func (a *LargeString) Value(i int) string { - i = i + a.array.data.offset + i = i + a.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } @@ -216,10 +216,10 @@ func (a *LargeString) ValueStr(i int) string { // ValueOffset returns the offset of the value at index i. func (a *LargeString) ValueOffset(i int) int64 { - if i < 0 || i > a.array.data.length { + if i < 0 || i > a.data.length { panic("arrow/array: index out of range") } - return a.offsets[i+a.array.data.offset] + return a.offsets[i+a.data.offset] } func (a *LargeString) ValueOffset64(i int) int64 { @@ -227,23 +227,23 @@ func (a *LargeString) ValueOffset64(i int) int64 { } func (a *LargeString) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - beg := a.array.data.offset + i + beg := a.data.offset + i return int(a.offsets[beg+1] - a.offsets[beg]) } func (a *LargeString) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 + beg := a.data.offset + end := beg + a.data.length + 1 return a.offsets[beg:end] } func (a *LargeString) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { + beg := a.data.offset + end := beg + a.data.length + if a.data.buffers[2] != nil { return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] } return nil @@ -283,11 +283,11 @@ func (a *LargeString) setData(data *Data) { a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) } - if a.array.data.length < 1 { + if a.data.length < 1 { return } - expNumOffsets := a.array.data.offset + a.array.data.length + 1 + expNumOffsets := a.data.offset + a.data.length + 1 if len(a.offsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) } @@ -356,10 +356,10 @@ func (a *StringView) setData(data *Data) { } func (a *StringView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { + if i < 0 || i >= a.data.length { panic("arrow/array: index out of range") } - return &a.values[a.array.data.offset+i] + return &a.values[a.data.offset+i] } func (a *StringView) Value(i int) string { @@ -457,7 +457,7 @@ func (b *StringBuilder) Append(v string) { // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *StringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) + b.AppendStringValues(v, valid) } // Value returns the string at index i. @@ -550,7 +550,7 @@ func (b *LargeStringBuilder) Append(v string) { // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *LargeStringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) + b.AppendStringValues(v, valid) } // Value returns the string at index i. @@ -630,11 +630,11 @@ func NewStringViewBuilder(mem memory.Allocator) *StringViewBuilder { } func (b *StringViewBuilder) Append(v string) { - b.BinaryViewBuilder.AppendString(v) + b.AppendString(v) } func (b *StringViewBuilder) AppendValues(v []string, valid []bool) { - b.BinaryViewBuilder.AppendStringValues(v, valid) + b.AppendStringValues(v, valid) } func (b *StringViewBuilder) UnmarshalOne(dec *json.Decoder) error { diff --git a/arrow/array/struct.go b/arrow/array/struct.go index 957947b..e0902ee 100644 --- a/arrow/array/struct.go +++ b/arrow/array/struct.go @@ -312,7 +312,7 @@ func (b *StructBuilder) Append(v bool) { // the underlying children they already ensure they have enough space // reserved. The only thing we must do is ensure we have enough space in // the validity bitmap of the struct builder itself. - b.builder.reserve(1, b.resizeHelper) + b.reserve(1, b.resizeHelper) b.unsafeAppendBoolToBitmap(v) if !v { for _, f := range b.fields { @@ -323,7 +323,7 @@ func (b *StructBuilder) Append(v bool) { func (b *StructBuilder) AppendValues(valids []bool) { b.Reserve(len(valids)) - b.builder.unsafeAppendBoolsToBitmap(valids, len(valids)) + b.unsafeAppendBoolsToBitmap(valids, len(valids)) } func (b *StructBuilder) AppendNull() { b.Append(false) } @@ -363,7 +363,7 @@ func (b *StructBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *StructBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) + b.reserve(n, b.resizeHelper) for _, f := range b.fields { f.Reserve(n) } @@ -386,7 +386,7 @@ func (b *StructBuilder) resizeHelper(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(n, b.builder.init) + b.resize(n, b.builder.init) } } diff --git a/arrow/array/timestamp.go b/arrow/array/timestamp.go index 9f8ca47..ff9ff4b 100644 --- a/arrow/array/timestamp.go +++ b/arrow/array/timestamp.go @@ -81,8 +81,8 @@ func (a *Timestamp) setData(data *Data) { vals := data.buffers[1] if vals != nil { a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length + beg := a.data.offset + end := beg + a.data.length a.values = a.values[beg:end] } } @@ -221,7 +221,7 @@ func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { b.Reserve(len(v)) arrow.TimestampTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) + b.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *TimestampBuilder) init(capacity int) { @@ -236,7 +236,7 @@ func (b *TimestampBuilder) init(capacity int) { // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *TimestampBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) + b.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), @@ -250,7 +250,7 @@ func (b *TimestampBuilder) Resize(n int) { if b.capacity == 0 { b.init(n) } else { - b.builder.resize(nBuilder, b.init) + b.resize(nBuilder, b.init) b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) } diff --git a/arrow/avro/reader_test.go b/arrow/avro/reader_test.go index 6dad0fd..1b07fbb 100644 --- a/arrow/avro/reader_test.go +++ b/arrow/avro/reader_test.go @@ -354,7 +354,7 @@ func TestEditSchemaStringEqual(t *testing.T) { if err != nil { t.Fatalf("%v", err) } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { + if fmt.Sprintf("%+v", want.String()) != fmt.Sprintf("%+v", got.String()) { t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) } else { t.Logf("schema.String() comparison passed") diff --git a/arrow/avro/reader_types.go b/arrow/avro/reader_types.go index d5c5127..50f0b18 100644 --- a/arrow/avro/reader_types.go +++ b/arrow/avro/reader_types.go @@ -249,7 +249,7 @@ func (d *dataLoader) loadDatum(data any) error { } func (d *dataLoader) newChild() *dataLoader { - var child *dataLoader = &dataLoader{ + var child = &dataLoader{ depth: d.depth + 1, } d.children = append(d.children, child) @@ -257,7 +257,7 @@ func (d *dataLoader) newChild() *dataLoader { } func (d *dataLoader) newListChild(list *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ + var child = &dataLoader{ list: list, item: list.childrens[0], depth: d.depth + 1, @@ -267,7 +267,7 @@ func (d *dataLoader) newListChild(list *fieldPos) *dataLoader { } func (d *dataLoader) newMapChild(mapField *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ + var child = &dataLoader{ mapField: mapField, depth: d.depth + 1, } @@ -296,7 +296,7 @@ func newFieldPos() *fieldPos { return &fieldPos{index: -1} } func (f *fieldPos) children() []*fieldPos { return f.childrens } func (f *fieldPos) newChild(childName string, childBuilder array.Builder, meta arrow.Metadata) *fieldPos { - var child fieldPos = fieldPos{ + var child = fieldPos{ parent: f, fieldName: childName, builder: childBuilder, diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go index 26e373d..91b1729 100644 --- a/arrow/avro/schema.go +++ b/arrow/avro/schema.go @@ -106,7 +106,7 @@ func arrowSchemafromAvro(n *schemaNode) { k := strconv.FormatInt(int64(index), 10) symbols[k] = symbol } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} + var dt = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} sl := int64(len(symbols)) switch { case sl <= math.MaxUint8: @@ -204,7 +204,7 @@ func iterateFields(n *schemaNode) { k := strconv.FormatInt(int64(index), 10) symbols[k] = symbol } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} + var dt = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} sl := len(symbols) switch { case sl <= math.MaxUint8: diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go index 8edea40..385bd4b 100644 --- a/arrow/avro/schema_test.go +++ b/arrow/avro/schema_test.go @@ -352,7 +352,7 @@ func TestSchemaStringEqual(t *testing.T) { if err != nil { t.Fatalf("%v", err) } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { + if fmt.Sprintf("%+v", want.String()) != fmt.Sprintf("%+v", got.String()) { t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) } else { t.Logf("schema.String() comparison passed") diff --git a/arrow/cdata/cdata.go b/arrow/cdata/cdata.go index 2e15edf..3ed0ca8 100644 --- a/arrow/cdata/cdata.go +++ b/arrow/cdata/cdata.go @@ -281,9 +281,10 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { case 'L': // large list dt = arrow.LargeListOfField(childFields[0]) case 'v': // list view/large list view - if f[2] == 'l' { + switch f[2] { + case 'l': dt = arrow.ListViewOfField(childFields[0]) - } else if f[2] == 'L' { + case 'L': dt = arrow.LargeListViewOfField(childFields[0]) } case 'w': // fixed size list is w:# where # is the list size. @@ -963,10 +964,10 @@ func (n *nativeCRecordBatchReader) Record() arrow.Record { return n.cur } func (n *nativeCRecordBatchReader) Next() bool { err := n.next() - switch { - case err == nil: + switch err { + case nil: return true - case err == io.EOF: + case io.EOF: return false } n.err = err diff --git a/arrow/compute/exec.go b/arrow/compute/exec.go index d37d95a..0afb3b1 100644 --- a/arrow/compute/exec.go +++ b/arrow/compute/exec.go @@ -150,7 +150,7 @@ func execInternal(ctx context.Context, fn Function, opts FunctionOptions, passed } case FuncVector: vkernel := k.(*exec.VectorKernel) - if !(allSame || !vkernel.CanExecuteChunkWise) { + if !allSame && vkernel.CanExecuteChunkWise { return nil, fmt.Errorf("%w: vector kernel arguments must all be the same length", arrow.ErrInvalid) } } diff --git a/arrow/compute/exec_internals_test.go b/arrow/compute/exec_internals_test.go index bbe4913..f813fb1 100644 --- a/arrow/compute/exec_internals_test.go +++ b/arrow/compute/exec_internals_test.go @@ -46,7 +46,7 @@ type ComputeInternalsTestSuite struct { func (c *ComputeInternalsTestSuite) SetupTest() { c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - c.rng = gen.NewRandomArrayGenerator(0, c.mem) + c.rng = gen.NewRandomArrayGenerator(1337, c.mem) c.resetCtx() } diff --git a/arrow/compute/executor.go b/arrow/compute/executor.go index bf41036..59f1c4e 100644 --- a/arrow/compute/executor.go +++ b/arrow/compute/executor.go @@ -249,7 +249,7 @@ func propagateNulls(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ArraySp var ( arrsWithNulls = make([]*exec.ArraySpan, 0, len(batch.Values)) isAllNull bool - prealloc bool = out.Buffers[0].Buf != nil + prealloc = out.Buffers[0].Buf != nil ) for i := range batch.Values { @@ -440,7 +440,7 @@ func (e *nonAggExecImpl) Init(ctx *exec.KernelCtx, args exec.KernelInitArgs) (er } func (e *nonAggExecImpl) prepareOutput(length int) *exec.ExecResult { - var nullCount int = array.UnknownNullCount + var nullCount = array.UnknownNullCount if e.kernel.GetNullHandling() == exec.NullNoOutput { nullCount = 0 @@ -761,7 +761,7 @@ func iterateExecSpans(batch *ExecBatch, maxChunkSize int64, promoteIfAllScalar b } var ( - args []Datum = batch.Values + args = batch.Values haveChunked bool chunkIdxes = make([]int, len(args)) valuePositions = make([]int64, len(args)) diff --git a/arrow/compute/expression.go b/arrow/compute/expression.go index 4e60d38..0de425f 100644 --- a/arrow/compute/expression.go +++ b/arrow/compute/expression.go @@ -822,8 +822,8 @@ func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, erro var ( getone func() (Expression, error) - index int = 0 - metadata = batch.Schema().Metadata() + index = 0 + metadata = batch.Schema().Metadata() ) getscalar := func(i string) (scalar.Scalar, error) { diff --git a/arrow/compute/exprs/types.go b/arrow/compute/exprs/types.go index 0c468f3..314df6f 100644 --- a/arrow/compute/exprs/types.go +++ b/arrow/compute/exprs/types.go @@ -320,9 +320,9 @@ func (e *extensionSet) GetArrowRegistry() *ExtensionIDRegistry { return e. func (e *extensionSet) GetSubstraitRegistry() expr.ExtensionRegistry { return e.ExtensionRegistry } func (e *extensionSet) DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) { - id, ok := e.Set.DecodeType(anchor) + id, ok := e.DecodeType(anchor) if !ok { - if id, ok = e.Set.DecodeTypeVariation(anchor); !ok { + if id, ok = e.DecodeTypeVariation(anchor); !ok { return id, nil, false } } @@ -332,7 +332,7 @@ func (e *extensionSet) DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.Data } func (e *extensionSet) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) { - id, ok := e.Set.DecodeFunc(ref) + id, ok := e.DecodeFunc(ref) if !ok { return id, nil, false } @@ -353,7 +353,7 @@ func (e *extensionSet) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, ui return extensions.ID{}, 0, false } - return id, e.Set.GetTypeVariationAnchor(id), true + return id, e.GetTypeVariationAnchor(id), true } func (e *extensionSet) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) { @@ -362,11 +362,11 @@ func (e *extensionSet) EncodeType(dt arrow.DataType) (extensions.ID, uint32, boo return extensions.ID{}, 0, false } - return id, e.Set.GetTypeAnchor(id), true + return id, e.GetTypeAnchor(id), true } func (e *extensionSet) EncodeFunction(id extensions.ID) uint32 { - return e.Set.GetFuncAnchor(id) + return e.GetFuncAnchor(id) } // ExtensionIDRegistry manages a set of mappings between Arrow types diff --git a/arrow/compute/functions.go b/arrow/compute/functions.go index 11d8e68..84f9bbc 100644 --- a/arrow/compute/functions.go +++ b/arrow/compute/functions.go @@ -150,7 +150,7 @@ func (b *baseFunction) Validate() error { } argCount := len(b.doc.ArgNames) - if argCount != b.arity.NArgs && !(b.arity.IsVarArgs && argCount == b.arity.NArgs+1) { + if argCount != b.arity.NArgs && (!b.arity.IsVarArgs || argCount != b.arity.NArgs+1) { return fmt.Errorf("in function '%s': number of argument names for function doc != function arity", b.name) } diff --git a/arrow/compute/scalar_compare_test.go b/arrow/compute/scalar_compare_test.go index e45b3af..4b4d78b 100644 --- a/arrow/compute/scalar_compare_test.go +++ b/arrow/compute/scalar_compare_test.go @@ -1195,7 +1195,7 @@ func (c *CompareStringSuite) validateCompareComputed(op kernels.CompareOperator, } defer expected.Release() - c.CompareSuite.validateCompareDatum(op, lhs, rhs, expected) + c.validateCompareDatum(op, lhs, rhs, expected) } func (c *CompareStringSuite) TestRandomCompareArrayArray() { diff --git a/arrow/csv/reader.go b/arrow/csv/reader.go index 284964a..3d919a9 100644 --- a/arrow/csv/reader.go +++ b/arrow/csv/reader.go @@ -802,7 +802,7 @@ func (r *Reader) parseListLike(field array.ListLikeBuilder, str string) { field.AppendNull() return } - if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) { + if !strings.HasPrefix(str, "{") || !strings.HasSuffix(str, "}") { r.err = errors.New("invalid list format. should start with '{' and end with '}'") return } @@ -830,7 +830,7 @@ func (r *Reader) parseFixedSizeList(field *array.FixedSizeListBuilder, str strin field.AppendNull() return } - if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) { + if !strings.HasPrefix(str, "{") || !strings.HasSuffix(str, "}") { r.err = errors.New("invalid list format. should start with '{' and end with '}'") return } diff --git a/arrow/datatype_extension_test.go b/arrow/datatype_extension_test.go index ae246e0..9a3361c 100644 --- a/arrow/datatype_extension_test.go +++ b/arrow/datatype_extension_test.go @@ -42,7 +42,7 @@ func (BadExtensionType) Deserialize(_ arrow.DataType, _ string) (arrow.Extension func TestMustEmbedBase(t *testing.T) { var ext interface{} = &BadExtensionType{} assert.Panics(t, func() { - var _ arrow.ExtensionType = ext.(arrow.ExtensionType) + var _ = ext.(arrow.ExtensionType) }) } diff --git a/arrow/decimal256/decimal256_test.go b/arrow/decimal256/decimal256_test.go index 421b598..a93cdfe 100644 --- a/arrow/decimal256/decimal256_test.go +++ b/arrow/decimal256/decimal256_test.go @@ -601,7 +601,7 @@ func TestHexFromString(t *testing.T) { t.Errorf("expected: %s, actual: %s\n", decStr, num.ToString(38)) actualCoeff := num.BigInt() - expectedCoeff, _ := (&big.Int{}).SetString(strings.Replace(decStr, ".", "", -1), 10) + expectedCoeff, _ := (&big.Int{}).SetString(strings.ReplaceAll(decStr, ".", ""), 10) t.Errorf("expected(hex): %X, actual(hex): %X\n", expectedCoeff.Bytes(), actualCoeff.Bytes()) } } diff --git a/arrow/extensions/json.go b/arrow/extensions/json.go index 2324009..3f46b50 100644 --- a/arrow/extensions/json.go +++ b/arrow/extensions/json.go @@ -55,7 +55,7 @@ func NewJSONType(storageType arrow.DataType) (*JSONType, error) { func (b *JSONType) ArrayType() reflect.Type { return reflect.TypeOf(JSONArray{}) } func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) { - if !(data == "" || data == "{}") { + if data != "" && data != "{}" { return nil, fmt.Errorf("serialized metadata for JSON extension type must be '' or '{}', found: %s", data) } return NewJSONType(storageType) diff --git a/arrow/extensions/variant.go b/arrow/extensions/variant.go index ff8aa1d..53de4ff 100644 --- a/arrow/extensions/variant.go +++ b/arrow/extensions/variant.go @@ -395,7 +395,7 @@ func (v *VariantArray) String() string { val, err := v.Value(i) if err != nil { - o.WriteString(fmt.Sprintf("error: %v", err)) + fmt.Fprintf(o, "error: %v", err) continue } @@ -1002,12 +1002,13 @@ func variantTypeFromArrow(dt arrow.DataType) variant.Type { case arrow.TIMESTAMP: dt := dt.(*arrow.TimestampType) isUTC := dt.TimeZone == "" || dt.TimeZone == "UTC" - if dt.Unit == arrow.Microsecond { + switch dt.Unit { + case arrow.Microsecond: if isUTC { return variant.TimestampMicros } return variant.TimestampMicrosNTZ - } else if dt.Unit == arrow.Nanosecond { + case arrow.Nanosecond: if isUTC { return variant.TimestampNanos } diff --git a/arrow/flight/client.go b/arrow/flight/client.go index e1860a1..9e5da87 100644 --- a/arrow/flight/client.go +++ b/arrow/flight/client.go @@ -310,7 +310,7 @@ func NewClientFromConn(cc grpc.ClientConnInterface, auth ClientAuthHandler) Clie func (c *client) AuthenticateBasicToken(ctx context.Context, username, password string, opts ...grpc.CallOption) (context.Context, error) { authCtx := metadata.AppendToOutgoingContext(ctx, "Authorization", "Basic "+base64.RawStdEncoding.EncodeToString([]byte(strings.Join([]string{username, password}, ":")))) - stream, err := c.FlightServiceClient.Handshake(authCtx, opts...) + stream, err := c.Handshake(authCtx, opts...) if err != nil { return ctx, err } @@ -346,7 +346,7 @@ func (c *client) Authenticate(ctx context.Context, opts ...grpc.CallOption) erro return status.Error(codes.NotFound, "cannot authenticate without an auth-handler") } - stream, err := c.FlightServiceClient.Handshake(ctx, opts...) + stream, err := c.Handshake(ctx, opts...) if err != nil { return err } diff --git a/arrow/float16/float16.go b/arrow/float16/float16.go index f61db40..0aa4df8 100644 --- a/arrow/float16/float16.go +++ b/arrow/float16/float16.go @@ -63,10 +63,10 @@ func (f Num) Float32() float32 { exp := (f.bits >> 10) & 0x1f res := uint32(exp) + 127 - 15 fc := uint32(f.bits & 0x3ff) - switch { - case exp == 0: + switch exp { + case 0: res = 0 - case exp == 0x1f: + case 0x1f: res = 0xff } return math.Float32frombits((sn << 31) | (res << 23) | (fc << 13)) diff --git a/arrow/internal/arrjson/arrjson.go b/arrow/internal/arrjson/arrjson.go index 2181ebd..a2c86ff 100644 --- a/arrow/internal/arrjson/arrjson.go +++ b/arrow/internal/arrjson/arrjson.go @@ -568,10 +568,10 @@ func (f *FieldWrapper) UnmarshalJSON(data []byte) error { if len(f.Metadata) > 0 { // unmarshal the key/value metadata pairs var ( - mdkeys = make([]string, 0, len(f.Metadata)) - mdvals = make([]string, 0, len(f.Metadata)) - extKeyIdx int = -1 - extDataIdx int = -1 + mdkeys = make([]string, 0, len(f.Metadata)) + mdvals = make([]string, 0, len(f.Metadata)) + extKeyIdx = -1 + extDataIdx = -1 ) for i, kv := range f.Metadata { diff --git a/arrow/internal/testing/gen/random_array_gen.go b/arrow/internal/testing/gen/random_array_gen.go index 7350454..aadc24a 100644 --- a/arrow/internal/testing/gen/random_array_gen.go +++ b/arrow/internal/testing/gen/random_array_gen.go @@ -18,13 +18,14 @@ package gen import ( "math" + "math/rand/v2" + "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" - "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -40,7 +41,7 @@ type RandomArrayGenerator struct { // NewRandomArrayGenerator constructs a new generator with the requested Seed func NewRandomArrayGenerator(seed uint64, mem memory.Allocator) RandomArrayGenerator { - src := rand.NewSource(seed) + src := rand.NewPCG(seed, 0) return RandomArrayGenerator{seed, 0, src, rand.New(src), mem} } @@ -54,7 +55,7 @@ func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob float // bernoulli distribution uses P to determine the probability of a 0 or a 1, // which we'll use to generate the bitmap. - dist := distuv.Bernoulli{P: 1 - prob, Src: rand.NewSource(r.seed + r.extra)} + dist := distuv.Bernoulli{P: 1 - prob, Src: rand.NewPCG(r.seed, r.extra)} for i := 0; int64(i) < n; i++ { if dist.Rand() != float64(0.0) { bitutil.SetBit(buffer, i) @@ -99,112 +100,46 @@ func (r *RandomArrayGenerator) baseGenPrimitive(size int64, prob float64, byteWi return buffers, nullCount } -func (r *RandomArrayGenerator) Int8(size int64, min, max int8, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Int8SizeBytes) +func genArray[T arrow.IntType | arrow.UintType](r *RandomArrayGenerator, size int64, min, max T, prob float64) arrow.Array { + buffers, nullcount := r.baseGenPrimitive(size, prob, int(unsafe.Sizeof(T(0)))) for _, b := range buffers { defer b.Release() } r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Int8Traits.CastFromBytes(buffers[1].Bytes()) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) + out := arrow.GetData[T](buffers[1].Bytes()) for i := int64(0); i < size; i++ { - out[i] = int8(dist.Intn(int(max)-int(min+1))) + min + out[i] = T(dist.IntN(int(max)-int(min+1))) + min } - data := array.NewData(arrow.PrimitiveTypes.Int8, int(size), buffers, nil, int(nullcount), 0) + data := array.NewData(arrow.GetDataType[T](), int(size), buffers, nil, int(nullcount), 0) defer data.Release() - return array.NewInt8Data(data) + return array.MakeFromData(data) } -func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Uint8SizeBytes) - for _, b := range buffers { - defer b.Release() - } - - r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Uint8Traits.CastFromBytes(buffers[1].Bytes()) - for i := int64(0); i < size; i++ { - out[i] = uint8(dist.Intn(int(max)-int(min)+1)) + min - } +func (r *RandomArrayGenerator) Int8(size int64, min, max int8, prob float64) arrow.Array { + return genArray(r, size, min, max, prob) +} - data := array.NewData(arrow.PrimitiveTypes.Uint8, int(size), buffers, nil, int(nullcount), 0) - defer data.Release() - return array.NewUint8Data(data) +func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) arrow.Array { + return genArray(r, size, min, max, prob) } func (r *RandomArrayGenerator) Int16(size int64, min, max int16, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Int16SizeBytes) - for _, b := range buffers { - defer b.Release() - } - - r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Int16Traits.CastFromBytes(buffers[1].Bytes()) - for i := int64(0); i < size; i++ { - out[i] = int16(dist.Intn(int(max)-int(min)+1)) + min - } - - data := array.NewData(arrow.PrimitiveTypes.Int16, int(size), buffers, nil, int(nullcount), 0) - defer data.Release() - return array.NewInt16Data(data) + return genArray(r, size, min, max, prob) } func (r *RandomArrayGenerator) Uint16(size int64, min, max uint16, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Uint16SizeBytes) - for _, b := range buffers { - defer b.Release() - } - - r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Uint16Traits.CastFromBytes(buffers[1].Bytes()) - for i := int64(0); i < size; i++ { - out[i] = uint16(dist.Intn(int(max)-int(min)+1)) + min - } - - data := array.NewData(arrow.PrimitiveTypes.Uint16, int(size), buffers, nil, int(nullcount), 0) - defer data.Release() - return array.NewUint16Data(data) + return genArray(r, size, min, max, prob) } func (r *RandomArrayGenerator) Int32(size int64, min, max int32, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Int32SizeBytes) - for _, b := range buffers { - defer b.Release() - } - - r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) - for i := int64(0); i < size; i++ { - out[i] = int32(dist.Intn(int(max)-int(min)+1)) + min - } - - data := array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers, nil, int(nullcount), 0) - defer data.Release() - return array.NewInt32Data(data) + return genArray(r, size, min, max, prob) } func (r *RandomArrayGenerator) Uint32(size int64, min, max uint32, prob float64) arrow.Array { - buffers, nullcount := r.baseGenPrimitive(size, prob, arrow.Uint32SizeBytes) - for _, b := range buffers { - defer b.Release() - } - - r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) - out := arrow.Uint32Traits.CastFromBytes(buffers[1].Bytes()) - for i := int64(0); i < size; i++ { - out[i] = uint32(dist.Uint64n(uint64(max)-uint64(min)+1)) + min - } - - data := array.NewData(arrow.PrimitiveTypes.Uint32, int(size), buffers, nil, int(nullcount), 0) - defer data.Release() - return array.NewUint32Data(data) + return genArray(r, size, min, max, prob) } func (r *RandomArrayGenerator) Int64(size int64, min, max int64, prob float64) arrow.Array { @@ -214,7 +149,7 @@ func (r *RandomArrayGenerator) Int64(size int64, min, max int64, prob float64) a } r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes()) if max == math.MaxInt64 && min == math.MinInt64 { for i := int64(0); i < size; i++ { @@ -222,7 +157,7 @@ func (r *RandomArrayGenerator) Int64(size int64, min, max int64, prob float64) a } } else { for i := int64(0); i < size; i++ { - out[i] = dist.Int63n(max-min+1) + min + out[i] = dist.Int64N(max-min+1) + min } } @@ -238,7 +173,7 @@ func (r *RandomArrayGenerator) Uint64(size int64, min, max uint64, prob float64) } r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Uint64Traits.CastFromBytes(buffers[1].Bytes()) if max == math.MaxUint64 { for i := int64(0); i < size; i++ { @@ -246,7 +181,7 @@ func (r *RandomArrayGenerator) Uint64(size int64, min, max uint64, prob float64) } } else { for i := int64(0); i < size; i++ { - out[i] = dist.Uint64n(max-min+1) + min + out[i] = dist.Uint64N(max-min+1) + min } } @@ -262,7 +197,7 @@ func (r *RandomArrayGenerator) Float32(size int64, min, max float32, prob float6 } r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Float32Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { out[i] = min + dist.Float32()*(max+1-min) @@ -280,7 +215,7 @@ func (r *RandomArrayGenerator) Float64(size int64, min, max float64, prob float6 } r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Float64Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { out[i] = dist.NormFloat64() + (max - min) @@ -299,13 +234,13 @@ func (r *RandomArrayGenerator) String(size int64, minLength, maxLength int, null defer bldr.Release() r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) buf := make([]byte, 0, maxLength) gen := func(n int32) string { out := buf[:n] for i := range out { - out[i] = uint8(dist.Int31n(int32('z')-int32('A')+1) + int32('A')) + out[i] = uint8(dist.Int32N(int32('z')-int32('A')+1) + int32('A')) } return string(out) } @@ -329,13 +264,13 @@ func (r *RandomArrayGenerator) LargeString(size int64, minLength, maxLength int6 defer bldr.Release() r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) buf := make([]byte, 0, maxLength) gen := func(n int64) string { out := buf[:n] for i := range out { - out[i] = uint8(dist.Int63n(int64('z')-int64('A')+1) + int64('A')) + out[i] = uint8(dist.Int64N(int64('z')-int64('A')+1) + int64('A')) } return string(out) } @@ -363,13 +298,13 @@ func (r *RandomArrayGenerator) generateBinaryView(dt arrow.DataType, size int64, defer bldr.Release() r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) buf := make([]byte, 0, maxLength) gen := func(n int32) string { out := buf[:n] for i := range out { - out[i] = uint8(dist.Int31n(int32('z')-int32('A')+1) + int32('A')) + out[i] = uint8(dist.Int32N(int32('z')-int32('A')+1) + int32('A')) } return string(out) } @@ -422,9 +357,9 @@ func viewOffsetsFromLengthsArray32( sizes := sizesArray.Int32Values() offsets := make([]int32, sizesArray.Len()) - offsetDeltaRand := rand.New(rand.NewSource(seed)) + offsetDeltaRand := rand.New(rand.NewPCG(seed, 0)) sampleOffset := func(offsetBase int32) int32 { - delta := int32(offsetDeltaRand.Int63n(2*int64(avgLength)) - int64(avgLength)) + delta := int32(offsetDeltaRand.Int64N(2*int64(avgLength)) - int64(avgLength)) offset := offsetBase + delta if offset < 0 { return 0 @@ -463,9 +398,9 @@ func viewOffsetsFromLengthsArray64( sizes := sizesArray.Int64Values() offsets := make([]int64, sizesArray.Len()) - offsetDeltaRand := rand.New(rand.NewSource(seed)) + offsetDeltaRand := rand.New(rand.NewPCG(seed, 0)) sampleOffset := func(offsetBase int64) int64 { - delta := int64(offsetDeltaRand.Int63n(2*avgLength) - avgLength) + delta := int64(offsetDeltaRand.Int64N(2*avgLength) - avgLength) offset := offsetBase + delta if offset < 0 { return 0 diff --git a/arrow/ipc/compression.go b/arrow/ipc/compression.go index f74510a..abdf631 100644 --- a/arrow/ipc/compression.go +++ b/arrow/ipc/compression.go @@ -105,7 +105,7 @@ type lz4Decompressor struct { } func (z *lz4Decompressor) Close() { - z.Reader.Reset(nil) + z.Reset(nil) } func getDecompressor(codec flatbuf.CompressionType) decompressor { diff --git a/arrow/scalar/append_test.go b/arrow/scalar/append_test.go index 03ef7fa..b61013d 100644 --- a/arrow/scalar/append_test.go +++ b/arrow/scalar/append_test.go @@ -14,12 +14,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.18 - package scalar_test import ( "math" + "math/rand/v2" "strings" "testing" @@ -32,7 +31,6 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "golang.org/x/exp/constraints" - "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -42,14 +40,14 @@ type primitiveTypes interface { func draw[T constraints.Integer](n int64, min, max T) []T { const seed = 1337 - gen := rand.New(rand.NewSource(seed)) + gen := rand.New(rand.NewPCG(seed, 0)) normalizedMin := uint64(math.Abs(float64(min))) normalizedMax := uint64(max) + normalizedMin out := make([]T, n) for i := range out { - out[i] = T(gen.Uint64n(normalizedMax) - normalizedMin) + out[i] = T(gen.Uint64N(normalizedMax) - normalizedMin) } return out } @@ -58,7 +56,7 @@ func drawFloat[T float32 | float64](n int64) []T { const seed = 0xdeadbeef d := distuv.Uniform{ Min: -1000.0, Max: 1000.0, - Src: rand.NewSource(seed), + Src: rand.New(rand.NewPCG(seed, 0)), } out := make([]T, n) @@ -70,14 +68,14 @@ func drawFloat[T float32 | float64](n int64) []T { func drawBytes[T string | []byte](n int64, minLen, maxLen int) []T { const seed = 1337 - gen := rand.New(rand.NewSource(seed)) + gen := rand.New(rand.NewPCG(seed, 0)) out := make([]T, n) for i := range out { - l := gen.Intn(maxLen-minLen+1) + minLen + l := gen.IntN(maxLen-minLen+1) + minLen buf := make([]byte, l) for j := range buf { - buf[j] = uint8(gen.Intn(int('z')-int('A')+1) + int('A')) + buf[j] = uint8(gen.IntN(int('z')-int('A')+1) + int('A')) } out[i] = T(buf) } @@ -88,7 +86,7 @@ func randomBools(n int64, pctFalse float64) []bool { const seed = 0 d := distuv.Uniform{ Min: 0.0, Max: 1.0, - Src: rand.NewSource(seed), + Src: rand.NewPCG(seed, 0), } out := make([]bool, n) diff --git a/arrow/scalar/scalar_test.go b/arrow/scalar/scalar_test.go index 01f9668..a3a9766 100644 --- a/arrow/scalar/scalar_test.go +++ b/arrow/scalar/scalar_test.go @@ -882,7 +882,7 @@ func getScalars(mem memory.Allocator) []scalar.Scalar { scalar.NewDecimal256Scalar(decimal256.FromI64(10), &arrow.Decimal256Type{Precision: 16, Scale: 4}), scalar.NewStringScalarFromBuffer(hello), scalar.NewListScalar(int8Arr), - scalar.NewMapScalar(mapArr.List.ListValues()), + scalar.NewMapScalar(mapArr.ListValues()), scalar.NewFixedSizeListScalar(int8Arr), scalar.NewStructScalar([]scalar.Scalar{scalar.NewInt32Scalar(2), scalar.NewInt32Scalar(6)}, arrow.StructOf([]arrow.Field{{Name: "min", Type: arrow.PrimitiveTypes.Int32}, {Name: "max", Type: arrow.PrimitiveTypes.Int32}}...)), diff --git a/arrow/util/protobuf_reflect.go b/arrow/util/protobuf_reflect.go index c7f03ea..6950745 100644 --- a/arrow/util/protobuf_reflect.go +++ b/arrow/util/protobuf_reflect.go @@ -98,14 +98,14 @@ func (pfr *ProtobufFieldReflection) GetDescriptor() protoreflect.FieldDescriptor } func (pfr *ProtobufFieldReflection) name() string { - if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler != OneOfNull { + if pfr.isOneOf() && pfr.oneOfHandler != OneOfNull { return pfr.fieldNameFormatter(string(pfr.descriptor.ContainingOneof().Name())) } return pfr.fieldNameFormatter(string(pfr.descriptor.Name())) } func (pfr *ProtobufFieldReflection) arrowType() arrow.Type { - if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler == OneOfDenseUnion { + if pfr.isOneOf() && pfr.oneOfHandler == OneOfDenseUnion { return arrow.DENSE_UNION } if pfr.isEnum() { @@ -643,7 +643,7 @@ func (msg ProtobufMessageReflection) Record(mem memory.Allocator) arrow.Record { var fieldNames []string for i, f := range msg.fields { f.AppendValueOrNull(recordBuilder.Field(i), mem) - fieldNames = append(fieldNames, f.protobufReflection.name()) + fieldNames = append(fieldNames, f.name()) } var arrays []arrow.Array @@ -752,7 +752,7 @@ func (f ProtobufMessageFieldReflection) AppendValueOrNull(b array.Builder, mem m switch b.Type().ID() { case arrow.STRING: - if f.protobufReflection.isEnum() { + if f.isEnum() { b.(*array.StringBuilder).Append(string(fd.Enum().Values().ByNumber(pv.Enum()).Name())) } else { b.(*array.StringBuilder).Append(pv.String()) @@ -760,7 +760,7 @@ func (f ProtobufMessageFieldReflection) AppendValueOrNull(b array.Builder, mem m case arrow.BINARY: b.(*array.BinaryBuilder).Append(pv.Bytes()) case arrow.INT32: - if f.protobufReflection.isEnum() { + if f.isEnum() { b.(*array.Int32Builder).Append(int32(f.reflectValue().Int())) } else { b.(*array.Int32Builder).Append(int32(pv.Int())) diff --git a/internal/bitutils/bit_block_counter_test.go b/internal/bitutils/bit_block_counter_test.go index 58ac7f8..8ebbbe8 100644 --- a/internal/bitutils/bit_block_counter_test.go +++ b/internal/bitutils/bit_block_counter_test.go @@ -17,13 +17,13 @@ package bitutils_test import ( + "math/rand/v2" "testing" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/bitutils" "github.com/stretchr/testify/assert" - "golang.org/x/exp/rand" ) const kWordSize = 64 @@ -184,7 +184,7 @@ func TestFourWordsRandomData(t *testing.T) { ) buf := make([]byte, nbytes) - r := rand.New(rand.NewSource(0)) + r := &rand.ChaCha8{} r.Read(buf) checkWithOffset := func(offset int64) { diff --git a/internal/bitutils/bitmap_generate.go b/internal/bitutils/bitmap_generate.go index 178751d..19ddb96 100644 --- a/internal/bitutils/bitmap_generate.go +++ b/internal/bitutils/bitmap_generate.go @@ -60,10 +60,10 @@ func GenerateBitsUnrolled(bitmap []byte, start, length int64, g func() bool) { var ( curbyte byte - cur = bitmap[start/8:] - startBitOffset uint64 = uint64(start % 8) - mask = bitutil.BitMask[startBitOffset] - remaining = length + cur = bitmap[start/8:] + startBitOffset = uint64(start % 8) + mask = bitutil.BitMask[startBitOffset] + remaining = length ) if mask != 0x01 { diff --git a/internal/bitutils/bitmap_generate_test.go b/internal/bitutils/bitmap_generate_test.go index 3f3f8b7..b8510a0 100644 --- a/internal/bitutils/bitmap_generate_test.go +++ b/internal/bitutils/bitmap_generate_test.go @@ -17,10 +17,10 @@ package bitutils_test import ( + "math/rand/v2" "testing" "github.com/apache/arrow-go/v18/internal/bitutils" - "golang.org/x/exp/rand" ) const kBufferSize int64 = 1024 * 8 @@ -46,7 +46,7 @@ func runBench(b *testing.B, bitmap []byte, nbits int64, fn func([]byte, int64, i func BenchmarkGenerateBits(b *testing.B) { nbits := kBufferSize * 8 // random bytes - r := rand.New(rand.NewSource(0)) + r := &rand.ChaCha8{} bitmap := make([]byte, kBufferSize) r.Read(bitmap) @@ -58,7 +58,7 @@ func BenchmarkGenerateBits(b *testing.B) { func BenchmarkGenerateBitsUnrolled(b *testing.B) { nbits := kBufferSize * 8 // random bytes - r := rand.New(rand.NewSource(0)) + r := &rand.ChaCha8{} bitmap := make([]byte, kBufferSize) r.Read(bitmap) diff --git a/internal/types/extension_types.go b/internal/types/extension_types.go index 73c92a5..8257059 100644 --- a/internal/types/extension_types.go +++ b/internal/types/extension_types.go @@ -65,7 +65,7 @@ type Parametric1Type struct { func NewParametric1Type(p int32) *Parametric1Type { ret := &Parametric1Type{param: p} - ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32 + ret.Storage = arrow.PrimitiveTypes.Int32 return ret } @@ -116,7 +116,7 @@ type Parametric2Type struct { func NewParametric2Type(p int32) *Parametric2Type { ret := &Parametric2Type{param: p} - ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32 + ret.Storage = arrow.PrimitiveTypes.Int32 return ret } diff --git a/parquet/encryption_write_config_test.go b/parquet/encryption_write_config_test.go index ff0154a..b2c10e5 100644 --- a/parquet/encryption_write_config_test.go +++ b/parquet/encryption_write_config_test.go @@ -137,9 +137,9 @@ func (en *EncryptionConfigTestSuite) encryptFile(configs *parquet.FileEncryption int64Writer := nextColumn().(*file.Int64ColumnChunkWriter) for i := 0; i < 2*en.rowsPerRG; i++ { var ( - defLevel = [1]int16{1} - repLevel = [1]int16{0} - value int64 = int64(i) * 1000 * 1000 * 1000 * 1000 + defLevel = [1]int16{1} + repLevel = [1]int16{0} + value = int64(i) * 1000 * 1000 * 1000 * 1000 ) if i%2 == 0 { repLevel[0] = 0 diff --git a/parquet/file/column_reader_test.go b/parquet/file/column_reader_test.go index f96156a..575bda4 100644 --- a/parquet/file/column_reader_test.go +++ b/parquet/file/column_reader_test.go @@ -122,10 +122,11 @@ func makePages(version parquet.DataPageVersion, d *schema.Column, npages, lvlsPe } values := reflect.MakeSlice(reflect.SliceOf(typ), nvalues, nvalues) - if enc == parquet.Encodings.Plain { + switch enc { + case parquet.Encodings.Plain: initValues(values) return testutils.PaginatePlain(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.Plain), nvalues, values, defLevels, repLevels - } else if enc == parquet.Encodings.PlainDict || enc == parquet.Encodings.RLEDict { + case parquet.Encodings.PlainDict, parquet.Encodings.RLEDict: initDictValues(values, lvlsPerPage) return testutils.PaginateDict(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.RLEDict), nvalues, values, defLevels, repLevels } @@ -218,10 +219,10 @@ func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { var ( read int64 = 0 - totalRead int = 0 - batchActual int = 0 + totalRead = 0 + batchActual = 0 batchSize int32 = 8 - batch int = 0 + batch = 0 ) p.Require().NotNil(p.reader) @@ -251,7 +252,7 @@ func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { totalRead += batch batchActual += int(read) batchSize = int32(utils.Min(1<<24, utils.Max(int(batchSize*2), 4096))) - if batch <= 0 { + if read <= 0 { break } } diff --git a/parquet/file/file_reader_mmap.go b/parquet/file/file_reader_mmap.go index f2c06ea..44e48e9 100644 --- a/parquet/file/file_reader_mmap.go +++ b/parquet/file/file_reader_mmap.go @@ -64,12 +64,12 @@ func (m *mmapAdapter) Seek(offset int64, whence int) (int64, error) { case io.SeekCurrent: newPos = m.pos + offs case io.SeekEnd: - newPos = int64(m.ReaderAt.Len()) + offs + newPos = int64(m.Len()) + offs } if newPos < 0 { return 0, xerrors.New("negative result pos") } - if newPos > int64(m.ReaderAt.Len()) { + if newPos > int64(m.Len()) { return 0, xerrors.New("new position exceeds size of file") } m.pos = newPos diff --git a/parquet/file/level_conversion.go b/parquet/file/level_conversion.go index b0db789..7661288 100644 --- a/parquet/file/level_conversion.go +++ b/parquet/file/level_conversion.go @@ -179,10 +179,8 @@ func defLevelsToBitmapInternal(defLevels []int16, info LevelInfo, out *ValidityB // DefLevelsToBitmap creates a validitybitmap out of the passed in definition levels and info object. func DefLevelsToBitmap(defLevels []int16, info LevelInfo, out *ValidityBitmapInputOutput) { - hasRepeatedParent := false - if info.RepLevel > 0 { - hasRepeatedParent = true - } + hasRepeatedParent := info.RepLevel > 0 + defLevelsToBitmapInternal(defLevels, info, out, hasRepeatedParent) } diff --git a/parquet/file/record_reader.go b/parquet/file/record_reader.go index e2fdcc8..81ec0af 100644 --- a/parquet/file/record_reader.go +++ b/parquet/file/record_reader.go @@ -169,7 +169,7 @@ func (pr *primitiveRecordReader) Release() { } func (pr *primitiveRecordReader) SetPageReader(rdr PageReader) { - pr.ColumnChunkReader.setPageReader(rdr) + pr.setPageReader(rdr) } func (pr *primitiveRecordReader) ReleaseValidBits() *memory.Buffer { @@ -444,7 +444,7 @@ func (rr *recordReader) reserveLevels(extra int64) error { } func (rr *recordReader) reserveValues(extra int64) error { - return rr.recordReaderImpl.ReserveValues(extra, rr.leafInfo.HasNullableValues()) + return rr.ReserveValues(extra, rr.leafInfo.HasNullableValues()) } func (rr *recordReader) ResetValues() { diff --git a/parquet/internal/encoding/decoder.go b/parquet/internal/encoding/decoder.go index 64455d1..b2a012e 100644 --- a/parquet/internal/encoding/decoder.go +++ b/parquet/internal/encoding/decoder.go @@ -214,7 +214,7 @@ func spacedExpand(buffer interface{}, nullCount int, validBits []byte, validBits } var ( - numValues int = bufferRef.Len() + numValues = bufferRef.Len() ) idxDecode := int64(numValues - nullCount) diff --git a/parquet/internal/testutils/random.go b/parquet/internal/testutils/random.go index 5044b74..a0fc949 100644 --- a/parquet/internal/testutils/random.go +++ b/parquet/internal/testutils/random.go @@ -21,6 +21,7 @@ package testutils import ( "encoding/binary" "math" + "math/rand/v2" "time" "unsafe" @@ -33,7 +34,6 @@ import ( "github.com/apache/arrow-go/v18/parquet" "github.com/apache/arrow-go/v18/parquet/pqarrow" - "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -48,7 +48,7 @@ type RandomArrayGenerator struct { // NewRandomArrayGenerator constructs a new generator with the requested Seed func NewRandomArrayGenerator(seed uint64) RandomArrayGenerator { - src := rand.NewSource(seed) + src := rand.NewPCG(seed, 0) return RandomArrayGenerator{seed, 0, src, rand.New(src)} } @@ -62,7 +62,7 @@ func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob float // bernoulli distribution uses P to determine the probability of a 0 or a 1, // which we'll use to generate the bitmap. - dist := distuv.Bernoulli{P: prob, Src: rand.NewSource(r.seed + r.extra)} + dist := distuv.Bernoulli{P: prob, Src: rand.NewPCG(r.seed, r.extra)} for i := 0; int64(i) < n; i++ { if dist.Rand() != float64(0.0) { bitutil.SetBit(buffer, i) @@ -88,7 +88,7 @@ func (r *RandomArrayGenerator) ByteArray(size int64, minLen, maxLen int32, nullP defer lengths.Release() r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) bldr := array.NewStringBuilder(memory.DefaultAllocator) defer bldr.Release() @@ -98,7 +98,7 @@ func (r *RandomArrayGenerator) ByteArray(size int64, minLen, maxLen int32, nullP if lengths.IsValid(i) { l := lengths.Value(i) for j := int32(0); j < l; j++ { - strbuf[j] = byte(dist.Int31n(int32('z')-int32('A')+1) + int32('A')) + strbuf[j] = byte(dist.Int32N(int32('z')-int32('A')+1) + int32('A')) } val := strbuf[:l] bldr.Append(*(*string)(unsafe.Pointer(&val))) @@ -124,10 +124,10 @@ func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) a buffers[1].Resize(int(size * int64(arrow.Uint8SizeBytes))) r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Uint8Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { - out[i] = uint8(dist.Intn(int(max-min+1))) + min + out[i] = uint8(dist.IntN(int(max-min+1))) + min } return array.NewUint8Data(array.NewData(arrow.PrimitiveTypes.Uint8, int(size), buffers, nil, int(nullCount), 0)) @@ -147,10 +147,10 @@ func (r *RandomArrayGenerator) Int32(size int64, min, max int32, pctNull float64 buffers[1].Resize(arrow.Int32Traits.BytesRequired(int(size))) r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { - out[i] = dist.Int31n(max-min+1) + min + out[i] = dist.Int32N(max-min+1) + min } return array.NewInt32Data(array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers, nil, int(nullCount), 0)) } @@ -169,10 +169,10 @@ func (r *RandomArrayGenerator) Int64(size int64, min, max int64, pctNull float64 buffers[1].Resize(arrow.Int64Traits.BytesRequired(int(size))) r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { - out[i] = dist.Int63n(max-min+1) + min + out[i] = dist.Int64N(max-min+1) + min } return array.NewInt64Data(array.NewData(arrow.PrimitiveTypes.Int64, int(size), buffers, nil, int(nullCount), 0)) } @@ -191,7 +191,7 @@ func (r *RandomArrayGenerator) Float64(size int64, pctNull float64) *array.Float buffers[1].Resize(arrow.Float64Traits.BytesRequired(int(size))) r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Float64Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { out[i] = dist.NormFloat64() @@ -231,43 +231,43 @@ func (r *RandomArrayGenerator) StringWithRepeats(mem memory.Allocator, sz, uniqu // FillRandomInt8 populates the slice out with random int8 values between min and max using // seed as the random see for generation to allow consistency for testing. func FillRandomInt8(seed uint64, min, max int8, out []int8) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = int8(r.Intn(int(max-min+1))) + min + out[idx] = int8(r.IntN(int(max-min+1))) + min } } // FillRandomUint8 populates the slice out with random uint8 values between min and max using // seed as the random see for generation to allow consistency for testing. func FillRandomUint8(seed uint64, min, max uint8, out []uint8) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = uint8(r.Intn(int(max-min+1))) + min + out[idx] = uint8(r.IntN(int(max-min+1))) + min } } // FillRandomInt16 populates the slice out with random int16 values between min and max using // seed as the random see for generation to allow consistency for testing. func FillRandomInt16(seed uint64, min, max int16, out []int16) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = int16(r.Intn(int(max-min+1))) + min + out[idx] = int16(r.IntN(int(max-min+1))) + min } } // FillRandomUint16 populates the slice out with random uint16 values between min and max using // seed as the random see for generation to allow consistency for testing. func FillRandomUint16(seed uint64, min, max uint16, out []uint16) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = uint16(r.Intn(int(max-min+1))) + min + out[idx] = uint16(r.IntN(int(max-min+1))) + min } } // FillRandomInt32 populates out with random int32 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomInt32(seed uint64, out []int32) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = int32(r.Uint32()) } @@ -276,34 +276,34 @@ func FillRandomInt32(seed uint64, out []int32) { // FillRandomInt32Max populates out with random int32 values between 0 and max using seed as the random // seed for the generator to allow consistency for testing. func FillRandomInt32Max(seed uint64, max int32, out []int32) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = r.Int31n(max) + out[idx] = r.Int32N(max) } } // FillRandomUint32Max populates out with random uint32 values between 0 and max using seed as the random // seed for the generator to allow consistency for testing. func FillRandomUint32Max(seed uint64, max uint32, out []uint32) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = uint32(r.Uint64n(uint64(max))) + out[idx] = uint32(r.Uint64N(uint64(max))) } } // FillRandomInt64Max populates out with random int64 values between 0 and max using seed as the random // seed for the generator to allow consistency for testing. func FillRandomInt64Max(seed uint64, max int64, out []int64) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = r.Int63n(max) + out[idx] = r.Int64N(max) } } // FillRandomUint32 populates out with random uint32 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomUint32(seed uint64, out []uint32) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = r.Uint32() } @@ -312,7 +312,7 @@ func FillRandomUint32(seed uint64, out []uint32) { // FillRandomUint64 populates out with random uint64 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomUint64(seed uint64, out []uint64) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = r.Uint64() } @@ -321,16 +321,16 @@ func FillRandomUint64(seed uint64, out []uint64) { // FillRandomUint64Max populates out with random uint64 values between 0 and max using seed as the random // seed for the generator to allow consistency for testing. func FillRandomUint64Max(seed uint64, max uint64, out []uint64) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { - out[idx] = r.Uint64n(max) + out[idx] = r.Uint64N(max) } } // FillRandomInt64 populates out with random int64 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomInt64(seed uint64, out []int64) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = int64(r.Uint64()) } @@ -340,7 +340,7 @@ func FillRandomInt64(seed uint64, out []int64) { // seed for the generator to allow consistency for testing. It does this by generating // three random uint32 values for each int96 value. func FillRandomInt96(seed uint64, out []parquet.Int96) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { *(*int32)(unsafe.Pointer(&out[idx][0])) = int32(r.Uint32()) *(*int32)(unsafe.Pointer(&out[idx][4])) = int32(r.Uint32()) @@ -374,7 +374,7 @@ func randFloat64(r *rand.Rand) float64 { // to better spread the values out and ensure we do not return any NaN or Inf values. func randFloat16(r *rand.Rand) float16.Num { for { - f := float16.FromBits(uint16(r.Uint64n(math.MaxUint16 + 1))) + f := float16.FromBits(uint16(r.Uint64N(math.MaxUint16 + 1))) if !f.IsNaN() { return f } @@ -384,7 +384,7 @@ func randFloat16(r *rand.Rand) float16.Num { // FillRandomFloat32 populates out with random float32 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomFloat32(seed uint64, out []float32) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = randFloat32(r) } @@ -393,7 +393,7 @@ func FillRandomFloat32(seed uint64, out []float32) { // FillRandomFloat64 populates out with random float64 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomFloat64(seed uint64, out []float64) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = randFloat64(r) } @@ -402,7 +402,7 @@ func FillRandomFloat64(seed uint64, out []float64) { // FillRandomFloat16 populates out with random float64 values using seed as the random // seed for the generator to allow consistency for testing. func FillRandomFloat16(seed uint64, out []float16.Num) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = randFloat16(r) } @@ -425,8 +425,11 @@ func FillRandomByteArray(seed uint64, out []parquet.ByteArray, heap *memory.Buff func FillRandomFixedByteArray(seed uint64, out []parquet.FixedLenByteArray, heap *memory.Buffer, size int) { heap.Resize(len(out) * size) + var seedBytes [32]byte + binary.LittleEndian.PutUint64(seedBytes[:8], seed) + buf := heap.Bytes() - r := rand.New(rand.NewSource(seed)) + r := rand.NewChaCha8(seedBytes) for idx := range out { r.Read(buf[:size]) out[idx] = buf[:size] @@ -438,7 +441,7 @@ func FillRandomFixedByteArray(seed uint64, out []parquet.FixedLenByteArray, heap // seed as the random seed to the generator in order to allow consistency for testing. This uses // a Bernoulli distribution of values. func FillRandomBooleans(p float64, seed uint64, out []bool) { - dist := distuv.Bernoulli{P: p, Src: rand.NewSource(seed)} + dist := distuv.Bernoulli{P: p, Src: rand.NewPCG(seed, 0)} for idx := range out { out[idx] = dist.Rand() != float64(0.0) } @@ -449,7 +452,7 @@ func FillRandomBooleans(p float64, seed uint64, out []bool) { // the default Golang random generator distribution of float64 values between 0 and 1 comparing against // pctNull. If the random value is > pctNull, it is true. func fillRandomIsValid(seed uint64, pctNull float64, out []bool) { - r := rand.New(rand.NewSource(seed)) + r := rand.New(rand.NewPCG(seed, 0)) for idx := range out { out[idx] = r.Float64() > pctNull } @@ -494,11 +497,15 @@ func InitValues(values interface{}, heap *memory.Buffer) { func RandomByteArray(seed uint64, out []parquet.ByteArray, heap *memory.Buffer, minlen, maxlen int) { heap.Resize(len(out) * (maxlen + arrow.Uint32SizeBytes)) + var seedBytes [32]byte + binary.LittleEndian.PutUint64(seedBytes[:8], seed) + buf := heap.Bytes() - r := rand.New(rand.NewSource(seed)) + src := rand.NewChaCha8(seedBytes) + r := rand.New(src) for idx := range out { - length := r.Intn(maxlen-minlen+1) + minlen - r.Read(buf[:length]) + length := r.IntN(maxlen-minlen+1) + minlen + src.Read(buf[:length]) out[idx] = buf[:length] buf = buf[length:] @@ -509,7 +516,10 @@ func RandomByteArray(seed uint64, out []parquet.ByteArray, heap *memory.Buffer, // for the values and seed as the random generator seed to allow consistency for testing. The // resulting values will be either 32 bytes or 16 bytes each depending on the precision. func RandomDecimals(n int64, seed uint64, precision int32) []byte { - r := rand.New(rand.NewSource(seed)) + var seedBytes [32]byte + binary.LittleEndian.PutUint64(seedBytes[:8], seed) + + r := rand.NewChaCha8(seedBytes) nreqBytes := pqarrow.DecimalSize(precision) byteWidth := 32 if precision <= 38 { diff --git a/parquet/internal/testutils/random_arrow.go b/parquet/internal/testutils/random_arrow.go index 6fa10d3..01ea08b 100644 --- a/parquet/internal/testutils/random_arrow.go +++ b/parquet/internal/testutils/random_arrow.go @@ -17,11 +17,12 @@ package testutils import ( + "math/rand/v2" + "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/memory" - "golang.org/x/exp/rand" ) // RandomNonNull generates a random arrow array of the requested type with length size with no nulls. @@ -156,10 +157,11 @@ func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Arra defer bldr.Release() buf := make([]byte, 12) - r := rand.New(rand.NewSource(0)) + src := &rand.ChaCha8{} + r := rand.New(src) for i := 0; i < size; i++ { - length := r.Intn(12-2+1) + 2 - r.Read(buf[:length]) + length := r.IntN(12-2+1) + 2 + src.Read(buf[:length]) bldr.Append(buf[:length]) } return bldr.NewArray() @@ -168,7 +170,7 @@ func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Arra defer bldr.Release() buf := make([]byte, 10) - r := rand.New(rand.NewSource(0)) + r := &rand.ChaCha8{} for i := 0; i < size; i++ { r.Read(buf) bldr.Append(buf) @@ -416,15 +418,16 @@ func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { } buf := make([]byte, 12) - r := rand.New(rand.NewSource(0)) + src := &rand.ChaCha8{} + r := rand.New(src) for i := 0; i < size; i++ { if !valid[i] { bldr.AppendNull() continue } - length := r.Intn(12-2+1) + 2 - r.Read(buf[:length]) + length := r.IntN(12-2+1) + 2 + src.Read(buf[:length]) bldr.Append(buf[:length]) } return bldr.NewArray() @@ -441,15 +444,16 @@ func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { } buf := make([]byte, 12) - r := rand.New(rand.NewSource(0)) + src := &rand.ChaCha8{} + r := rand.New(src) for i := 0; i < size; i++ { if !valid[i] { bldr.AppendNull() continue } - length := r.Intn(12-2+1) + 2 - r.Read(buf[:length]) + length := r.IntN(12-2+1) + 2 + src.Read(buf[:length]) // trivially force data to be valid UTF8 by making it all ASCII for idx := range buf[:length] { buf[idx] &= 0x7f @@ -470,7 +474,7 @@ func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array { } buf := make([]byte, 10) - r := rand.New(rand.NewSource(0)) + r := &rand.ChaCha8{} for i := 0; i < size; i++ { if !valid[i] { bldr.AppendNull() diff --git a/parquet/internal/utils/bit_reader_test.go b/parquet/internal/utils/bit_reader_test.go index 3987cd5..366ae40 100644 --- a/parquet/internal/utils/bit_reader_test.go +++ b/parquet/internal/utils/bit_reader_test.go @@ -22,6 +22,7 @@ import ( "fmt" "math" "math/bits" + "math/rand/v2" "strconv" "testing" @@ -32,7 +33,6 @@ import ( "github.com/apache/arrow-go/v18/parquet/internal/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" - "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -550,15 +550,15 @@ func (r *RLERandomSuite) TestRandomSequences() { const maxgroup = 16 values := make([]uint64, ngroups+maxgroup) - seed := rand.Uint64() ^ (rand.Uint64() << 32) - gen := rand.New(rand.NewSource(seed)) + seed1, seed2 := rand.Uint64(), rand.Uint64()<<32 + gen := rand.New(rand.NewPCG(seed1, seed2)) for itr := 0; itr < niters; itr++ { parity := false values = values[:0] for i := 0; i < ngroups; i++ { - groupsize := gen.Intn(19) + 1 + groupsize := gen.IntN(19) + 1 if groupsize > maxgroup { groupsize = 1 } @@ -572,7 +572,7 @@ func (r *RLERandomSuite) TestRandomSequences() { } parity = !parity } - r.Require().Truef(r.checkRoundTrip(values, bits.Len(uint(len(values)))), "failing seed: %d", seed) + r.Require().Truef(r.checkRoundTrip(values, bits.Len(uint(len(values)))), "failing seed: %d,%d", seed1, seed2) } } @@ -584,7 +584,7 @@ type RandomArrayGenerator struct { } func NewRandomArrayGenerator(seed uint64) RandomArrayGenerator { - src := rand.NewSource(seed) + src := rand.NewPCG(seed, 0) return RandomArrayGenerator{seed, 0, src, rand.New(src)} } @@ -592,7 +592,7 @@ func (r *RandomArrayGenerator) generateBitmap(buffer []byte, n int64, prob float count := int64(0) r.extra++ - dist := distuv.Bernoulli{P: prob, Src: rand.NewSource(r.seed + r.extra)} + dist := distuv.Bernoulli{P: prob, Src: rand.NewPCG(r.seed, r.extra)} for i := int(0); int64(i) < n; i++ { if dist.Rand() != float64(0.0) { bitutil.SetBit(buffer, i) @@ -616,10 +616,10 @@ func (r *RandomArrayGenerator) Int32(size int64, min, max int32, prob float64) a buffers[1].Resize(int(size * int64(arrow.Int32SizeBytes))) r.extra++ - dist := rand.New(rand.NewSource(r.seed + r.extra)) + dist := rand.New(rand.NewPCG(r.seed, r.extra)) out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) for i := int64(0); i < size; i++ { - out[i] = int32(dist.Int31n(max-min+1)) + min + out[i] = int32(dist.Int32N(max-min+1)) + min } return array.NewInt32Data(array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers, nil, int(nullCount), 0)) diff --git a/parquet/metadata/page_index.go b/parquet/metadata/page_index.go index 020ab4c..7e0b3e4 100644 --- a/parquet/metadata/page_index.go +++ b/parquet/metadata/page_index.go @@ -611,9 +611,10 @@ func newColumnIndexBuilder[T parquet.ColumnTypes](descr *schema.Column) *columnI } func (b *columnIndexBuilder[T]) AddPage(stats *EncodedStatistics) error { - if b.state == stateFinished { + switch b.state { + case stateFinished: return fmt.Errorf("%w: cannot add page to finished ColumnIndexBuilder", arrow.ErrInvalid) - } else if b.state == stateDiscarded { + case stateDiscarded: return nil } @@ -750,9 +751,10 @@ func (o *OffsetIndexBuilder) AddPageLoc(pgloc PageLocation) error { } func (o *OffsetIndexBuilder) AddPage(offset, firstRowIdx int64, compressedPgSize int32) error { - if o.state == stateFinished { + switch o.state { + case stateFinished: return fmt.Errorf("%w: cannot add page to finished OffsetIndexBuilder", arrow.ErrInvalid) - } else if o.state == stateDiscarded { + case stateDiscarded: // offset index is discarded, do nothing return nil } diff --git a/parquet/pqarrow/path_builder.go b/parquet/pqarrow/path_builder.go index 7abd0f5..784a2c2 100644 --- a/parquet/pqarrow/path_builder.go +++ b/parquet/pqarrow/path_builder.go @@ -251,7 +251,7 @@ type nullableNode struct { } func (n *nullableNode) clone() pathNode { - var ret nullableNode = *n + var ret = *n return &ret } @@ -632,7 +632,7 @@ type int16BufferBuilder struct { } func (b *int16BufferBuilder) Values() []int16 { - return arrow.Int16Traits.CastFromBytes(b.PooledBufferWriter.Bytes()) + return arrow.Int16Traits.CastFromBytes(b.Bytes()) } func (b *int16BufferBuilder) Value(i int) int16 { @@ -651,12 +651,12 @@ func (b *int16BufferBuilder) AppendCopies(count int, val int16) { } func (b *int16BufferBuilder) UnsafeAppend(v int16) { - b.PooledBufferWriter.UnsafeWrite((*(*[2]byte)(unsafe.Pointer(&v)))[:]) + b.UnsafeWrite((*(*[2]byte)(unsafe.Pointer(&v)))[:]) } func (b *int16BufferBuilder) Append(v int16) { b.PooledBufferWriter.Reserve(arrow.Int16SizeBytes) - b.PooledBufferWriter.Write((*(*[2]byte)(unsafe.Pointer(&v)))[:]) + b.Write((*(*[2]byte)(unsafe.Pointer(&v)))[:]) } func fillRepLevels(count int, repLvl int16, ctx *pathWriteCtx) { diff --git a/parquet/pqarrow/reader_writer_test.go b/parquet/pqarrow/reader_writer_test.go index d00b5d2..f346876 100644 --- a/parquet/pqarrow/reader_writer_test.go +++ b/parquet/pqarrow/reader_writer_test.go @@ -21,6 +21,7 @@ import ( "context" "fmt" "math" + "math/rand/v2" "testing" "unsafe" @@ -32,7 +33,6 @@ import ( "github.com/apache/arrow-go/v18/parquet/file" "github.com/apache/arrow-go/v18/parquet/pqarrow" "github.com/stretchr/testify/require" - "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -50,7 +50,7 @@ func randomUint8(size, truePct int, sampleVals [2]uint8, seed uint64) []uint8 { dist := distuv.Bernoulli{ P: float64(truePct) / 100.0, - Src: rand.NewSource(seed), + Src: rand.NewPCG(seed, 0), } for idx := range ret { @@ -70,7 +70,7 @@ func randomInt32(size, truePct int, sampleVals [2]int32, seed uint64) []int32 { dist := distuv.Bernoulli{ P: float64(truePct) / 100.0, - Src: rand.NewSource(seed), + Src: rand.NewPCG(seed, 0), } for idx := range ret { diff --git a/parquet/schema/reflection.go b/parquet/schema/reflection.go index c5e21b0..6082c29 100644 --- a/parquet/schema/reflection.go +++ b/parquet/schema/reflection.go @@ -242,7 +242,7 @@ func infoFromTags(f reflect.StructTag) *taggedInfo { info.Exclude = true return &info } - for _, tag := range strings.Split(strings.Replace(ptags, "\t", "", -1), ",") { + for _, tag := range strings.Split(strings.ReplaceAll(ptags, "\t", ""), ",") { tag = strings.TrimSpace(tag) kv := strings.SplitN(tag, "=", 2) key := strings.TrimSpace(strings.ToLower(kv[0])) @@ -488,9 +488,10 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info bitwidth := int8(typ.Bits()) if physical != parquet.Types.Undefined { - if ptyp == parquet.Types.Int32 { + switch ptyp { + case parquet.Types.Int32: bitwidth = 32 - } else if ptyp == parquet.Types.Int64 { + case parquet.Types.Int64: bitwidth = 64 } } @@ -517,9 +518,10 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info bitwidth := int8(typ.Bits()) if physical != parquet.Types.Undefined { - if ptyp == parquet.Types.Int32 { + switch ptyp { + case parquet.Types.Int32: bitwidth = 32 - } else if ptyp == parquet.Types.Int64 { + case parquet.Types.Int64: bitwidth = 64 } }