This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d151373 ARROW-14206: [Go][Parquet] Clean up s390x and arm build code
d151373 is described below
commit d151373f0963732955ce15a87568f462acef6aef
Author: Matthew Topol <[email protected]>
AuthorDate: Mon Oct 4 13:44:03 2021 -0400
ARROW-14206: [Go][Parquet] Clean up s390x and arm build code
Closes #11305 from zeroshade/arrow-14206-go-fix-builds
Lead-authored-by: Matthew Topol <[email protected]>
Co-authored-by: Matt Topol <[email protected]>
Signed-off-by: Matthew Topol <[email protected]>
---
go/arrow/bitutil/bitmaps.go | 28 ++-----
go/arrow/bitutil/endian_default.go | 33 ++++++++
go/arrow/bitutil/endian_s390x.go | 32 ++++++++
go/arrow/cdata/test/go.sum | 55 +++++++++++++
go/parquet/internal/encoding/byte_array_encoder.go | 2 +-
go/parquet/internal/encoding/encoder.go | 13 +---
go/parquet/internal/encoding/levels_test.go | 3 +-
go/parquet/internal/encoding/memo_table.go | 19 +++++
.../internal/encoding/memo_table_types.gen.go | 32 ++++++++
.../internal/encoding/memo_table_types.gen.go.tmpl | 8 ++
go/parquet/internal/encoding/typed_encoder.gen.go | 20 -----
.../internal/encoding/typed_encoder.gen.go.tmpl | 5 --
go/parquet/internal/hashing/xxh3_memo_table.gen.go | 90 ++++++++++++++++++++++
.../internal/hashing/xxh3_memo_table.gen.go.tmpl | 23 ++++++
go/parquet/internal/hashing/xxh3_memo_table.go | 8 ++
go/parquet/internal/utils/bit_block_counter.go | 13 +---
go/parquet/internal/utils/bit_run_reader.go | 2 +-
go/parquet/internal/utils/endians_default.go | 28 +++++++
go/parquet/internal/utils/endians_s390x.go | 31 ++++++++
go/parquet/metadata/statistics.go | 17 ++--
20 files changed, 381 insertions(+), 81 deletions(-)
diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go
index 4f72ae6..572d4d0 100644
--- a/go/arrow/bitutil/bitmaps.go
+++ b/go/arrow/bitutil/bitmaps.go
@@ -24,24 +24,6 @@ import (
"github.com/apache/arrow/go/arrow/internal/debug"
)
-// helper function to handle big-endian architectures properly
-var toFromLEFunc func(uint64) uint64
-var byteZero int
-
-func init() {
- if endian.IsBigEndian {
- // if we're on a big endian architecture, then use the reverse
bytes
- // function so we can perform byte-swaps when necessary
- toFromLEFunc = bits.ReverseBytes64
-
- byteZero = 7
- } else {
- // identity function if we're on a little endian architecture
- toFromLEFunc = func(in uint64) uint64 { return in }
- byteZero = 0
- }
-}
-
// BitmapReader is a simple bitmap reader for a byte slice.
type BitmapReader struct {
bitmap []byte
@@ -221,7 +203,7 @@ func NewBitmapWordReader(bitmap []byte, offset, length int)
*BitmapWordReader {
if bm.nwords > 0 {
bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap))
} else {
- (*[8]byte)(unsafe.Pointer(&bm.curword))[byteZero] = bm.bitmap[0]
+ setLSB(&bm.curword, bm.bitmap[0])
}
return bm
}
@@ -280,12 +262,12 @@ func (bm *BitmapWordReader) NextTrailingByte() (val byte,
validBits int) {
bm.bitmap = bm.bitmap[1:]
nextByte := bm.bitmap[0]
- val = (*[8]byte)(unsafe.Pointer(&bm.curword))[byteZero]
+ val = getLSB(bm.curword)
if bm.offset != 0 {
val >>= byte(bm.offset)
val |= nextByte << (8 - bm.offset)
}
- (*[8]byte)(unsafe.Pointer(&bm.curword))[byteZero] = nextByte
+ setLSB(&bm.curword, nextByte)
bm.trailingBits -= 8
bm.trailingBytes--
validBits = 8
@@ -321,7 +303,7 @@ func NewBitmapWordWriter(bitmap []byte, start, len int)
*BitmapWordWriter {
if ret.len >= int(unsafe.Sizeof(uint64(0))*8) {
ret.currentWord =
toFromLEFunc(endian.Native.Uint64(ret.bitmap))
} else if ret.len > 0 {
- (*[8]byte)(unsafe.Pointer(&ret.currentWord))[byteZero]
= ret.bitmap[0]
+ setLSB(&ret.currentWord, ret.bitmap[0])
}
}
return ret
@@ -359,7 +341,7 @@ func (bm *BitmapWordWriter) PutNextWord(word uint64) {
// PutNextTrailingByte writes the number of bits indicated by validBits from b
to
// the bitmap.
func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) {
- curbyte := (*[8]byte)(unsafe.Pointer(&bm.currentWord))[byteZero]
+ curbyte := getLSB(bm.currentWord)
if validBits == 8 {
if bm.offset != 0 {
b = (b << bm.offset) | (b >> (8 - bm.offset))
diff --git a/go/arrow/bitutil/endian_default.go
b/go/arrow/bitutil/endian_default.go
new file mode 100644
index 0000000..9f5d3cd
--- /dev/null
+++ b/go/arrow/bitutil/endian_default.go
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !s390x
+
+package bitutil
+
+import (
+ "unsafe"
+)
+
+var toFromLEFunc = func(in uint64) uint64 { return in }
+
+func getLSB(v uint64) byte {
+ return (*[8]byte)(unsafe.Pointer(&v))[0]
+}
+
+func setLSB(v *uint64, b byte) {
+ (*[8]byte)(unsafe.Pointer(v))[0] = b
+}
diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go
new file mode 100644
index 0000000..a9bba43
--- /dev/null
+++ b/go/arrow/bitutil/endian_s390x.go
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bitutil
+
+import (
+ "math/bits"
+ "unsafe"
+)
+
+var toFromLEFunc = bits.ReverseBytes64
+
+func getLSB(v uint64) byte {
+ return (*[8]byte)(unsafe.Pointer(&v))[7]
+}
+
+func setLSB(v *uint64, b byte) {
+ (*[8]byte)(unsafe.Pointer(v))[7] = b
+}
diff --git a/go/arrow/cdata/test/go.sum b/go/arrow/cdata/test/go.sum
index 9cbf776..033f191 100644
--- a/go/arrow/cdata/test/go.sum
+++ b/go/arrow/cdata/test/go.sum
@@ -1,7 +1,12 @@
cloud.google.com/go v0.26.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod
h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod
h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
github.com/BurntSushi/toml v0.3.1/go.mod
h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod
h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod
h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/antihax/optional v1.0.0/go.mod
h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
+github.com/boombuler/barcode v1.0.0/go.mod
h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod
h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod
h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod
h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -17,7 +22,16 @@ github.com/envoyproxy/go-control-plane
v0.9.9-0.20201210154907-fd9021fe5dad/go.m
github.com/envoyproxy/go-control-plane
v0.9.9-0.20210217033140-668b12f5399d/go.mod
h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane
v0.9.9-0.20210512163311-63b5d3c536b0/go.mod
h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod
h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/fogleman/gg v1.3.0/go.mod
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
github.com/ghodss/yaml v1.0.0/go.mod
h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-fonts/dejavu v0.1.0/go.mod
h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
+github.com/go-fonts/latin-modern v0.2.0/go.mod
h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
+github.com/go-fonts/liberation v0.1.1/go.mod
h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
+github.com/go-fonts/stix v0.1.0/go.mod
h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod
h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod
h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod
h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod
h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod
h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod
h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -46,28 +60,54 @@ github.com/google/go-cmp v0.5.5/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.6/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.1.2/go.mod
h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod
h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/klauspost/compress v1.13.1
h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
github.com/klauspost/compress v1.13.1/go.mod
h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/phpdave11/gofpdf v1.4.2/go.mod
h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
+github.com/phpdave11/gofpdi v1.0.12/go.mod
h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
github.com/pierrec/lz4/v4 v4.1.8
h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
github.com/pierrec/lz4/v4 v4.1.8/go.mod
h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pkg/errors v0.8.1/go.mod
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1/go.mod
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod
h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/fastuuid v1.2.0/go.mod
h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod
h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
github.com/stretchr/objx v0.1.0/go.mod
h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod
h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod
h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0
h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod
h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/yuin/goldmark v1.3.5/go.mod
h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod
h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod
h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod
h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3
h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod
h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod
h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod
h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod
h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod
h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod
h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -89,9 +129,11 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod
h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -101,11 +143,14 @@ golang.org/x/text v0.3.0/go.mod
h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod
h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod
h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod
h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.1.4/go.mod
h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
@@ -114,6 +159,14 @@ golang.org/x/xerrors
v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod
h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2/go.mod
h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s=
+gonum.org/v1/gonum v0.9.3/go.mod
h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0
h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod
h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod
h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
+gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
google.golang.org/appengine v1.1.0/go.mod
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod
h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
@@ -141,6 +194,7 @@ google.golang.org/protobuf v1.25.0/go.mod
h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
google.golang.org/protobuf v1.26.0-rc.1/go.mod
h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod
h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod
h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405
h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod
h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
@@ -148,3 +202,4 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
h1:dUUwHk2QECo/6vqA44rthZ8ie
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod
h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/go/parquet/internal/encoding/byte_array_encoder.go
b/go/parquet/internal/encoding/byte_array_encoder.go
index 8d46c6f..3015bf8 100644
--- a/go/parquet/internal/encoding/byte_array_encoder.go
+++ b/go/parquet/internal/encoding/byte_array_encoder.go
@@ -37,7 +37,7 @@ type PlainByteArrayEncoder struct {
func (enc *PlainByteArrayEncoder) PutByteArray(val parquet.ByteArray) {
inc := val.Len() + arrow.Uint32SizeBytes
enc.sink.Reserve(inc)
- vlen := toLEFunc(uint32(val.Len()))
+ vlen := utils.ToLEUint32(uint32(val.Len()))
enc.sink.UnsafeWrite((*(*[4]byte)(unsafe.Pointer(&vlen)))[:])
enc.sink.UnsafeWrite(val)
}
diff --git a/go/parquet/internal/encoding/encoder.go
b/go/parquet/internal/encoding/encoder.go
index 2f9a40c..4121a11 100644
--- a/go/parquet/internal/encoding/encoder.go
+++ b/go/parquet/internal/encoding/encoder.go
@@ -22,7 +22,6 @@ import (
"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/bitutil"
- "github.com/apache/arrow/go/arrow/endian"
"github.com/apache/arrow/go/arrow/memory"
"github.com/apache/arrow/go/parquet"
format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
@@ -30,16 +29,6 @@ import (
"github.com/apache/arrow/go/parquet/schema"
)
-var toLEFunc func(uint32) uint32
-
-func init() {
- if endian.IsBigEndian {
- toLEFunc = bits.ReverseBytes32
- } else {
- toLEFunc = func(in uint32) uint32 { return in }
- }
-}
-
//go:generate go run ../../../arrow/_tools/tmpl/main.go -i
-data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl
typed_encoder.gen.go.tmpl
// EncoderTraits is an interface for the different types to make it more
@@ -186,7 +175,7 @@ func (d *dictEncoder) BitWidth() int {
// WriteDict writes the dictionary index to the given byte slice.
func (d *dictEncoder) WriteDict(out []byte) {
- d.memo.CopyValues(out)
+ d.memo.WriteOut(out)
}
// WriteIndices performs Run Length encoding on the indexes and the writes the
encoded
diff --git a/go/parquet/internal/encoding/levels_test.go
b/go/parquet/internal/encoding/levels_test.go
index fe93b40..a9d98ec 100644
--- a/go/parquet/internal/encoding/levels_test.go
+++ b/go/parquet/internal/encoding/levels_test.go
@@ -25,6 +25,7 @@ import (
"github.com/apache/arrow/go/arrow/memory"
"github.com/apache/arrow/go/parquet"
"github.com/apache/arrow/go/parquet/internal/encoding"
+ "github.com/apache/arrow/go/parquet/internal/utils"
"github.com/stretchr/testify/assert"
)
@@ -62,7 +63,7 @@ func encodeLevels(t *testing.T, enc parquet.Encoding, maxLvl
int16, numLevels in
encoder.Init(enc, maxLvl, buf)
lvlCount, _ = encoder.Encode(input)
buf.SetOffset(0)
- arrow.Int32Traits.CastFromBytes(buf.Bytes())[0] =
int32(encoder.Len())
+ arrow.Int32Traits.CastFromBytes(buf.Bytes())[0] =
utils.ToLEInt32(int32(encoder.Len()))
} else {
encoder.Init(enc, maxLvl, buf)
lvlCount, _ = encoder.Encode(input)
diff --git a/go/parquet/internal/encoding/memo_table.go
b/go/parquet/internal/encoding/memo_table.go
index 9a04e6e..acd769e 100644
--- a/go/parquet/internal/encoding/memo_table.go
+++ b/go/parquet/internal/encoding/memo_table.go
@@ -46,6 +46,9 @@ type MemoTable interface {
// CopyValuesSubset is like CopyValues but only copies a subset of
values starting
// at the indicated index.
CopyValuesSubset(start int, out interface{})
+
+ WriteOut(out []byte)
+ WriteOutSubset(start int, out []byte)
// Get returns the index of the table the specified value is, and a
boolean indicating
// whether or not the value was found in the table. Will panic if val
is not the appropriate
// type for the underlying table.
@@ -231,6 +234,14 @@ func (m *binaryMemoTableImpl) CopyValuesSubset(start int,
out interface{}) {
copy(outval, m.builder.Value(start)[0:length])
}
+func (m *binaryMemoTableImpl) WriteOut(out []byte) {
+ m.CopyValues(out)
+}
+
+func (m *binaryMemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *binaryMemoTableImpl) CopyFixedWidthValues(start, width int, out
[]byte) {
}
@@ -378,3 +389,11 @@ func (m *float64MemoTableImpl) CopyValuesSubset(start int,
out interface{}) {
outval[m.nanIndex] = math.NaN()
}
}
+
+func (m *float64MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, arrow.Float64Traits.CastFromBytes(out))
+}
+
+func (m *float64MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out))
+}
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go
b/go/parquet/internal/encoding/memo_table_types.gen.go
index 5c4812c..d9707b0 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go
@@ -99,6 +99,14 @@ func (m *int32MemoTableImpl) GetOrInsert(val interface{})
(idx int, found bool,
return
}
+func (m *int32MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, out)
+}
+
+func (m *int32MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *int32MemoTableImpl) CopyValues(out interface{}) {
m.CopyValuesSubset(0, out)
}
@@ -183,6 +191,14 @@ func (m *int64MemoTableImpl) GetOrInsert(val interface{})
(idx int, found bool,
return
}
+func (m *int64MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, out)
+}
+
+func (m *int64MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *int64MemoTableImpl) CopyValues(out interface{}) {
m.CopyValuesSubset(0, out)
}
@@ -267,6 +283,14 @@ func (m *int96MemoTableImpl) GetOrInsert(val interface{})
(idx int, found bool,
return
}
+func (m *int96MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, out)
+}
+
+func (m *int96MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *int96MemoTableImpl) CopyValues(out interface{}) {
m.CopyValuesSubset(0, out)
}
@@ -351,6 +375,14 @@ func (m *float32MemoTableImpl) GetOrInsert(val
interface{}) (idx int, found bool
return
}
+func (m *float32MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, out)
+}
+
+func (m *float32MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *float32MemoTableImpl) CopyValues(out interface{}) {
m.CopyValuesSubset(0, out)
}
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
index 0a0a7af..f809bbf 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
@@ -98,6 +98,14 @@ func (m *{{.lower}}MemoTableImpl) GetOrInsert(val
interface{}) (idx int, found b
return
}
+func (m *{{.lower}}MemoTableImpl) WriteOut(out []byte) {
+ m.CopyValuesSubset(0, out)
+}
+
+func (m *{{.lower}}MemoTableImpl) WriteOutSubset(start int, out []byte) {
+ m.CopyValuesSubset(start, out)
+}
+
func (m *{{.lower}}MemoTableImpl) CopyValues(out interface{}) {
m.CopyValuesSubset(0, out)
}
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go
b/go/parquet/internal/encoding/typed_encoder.gen.go
index 192286f..211f062 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -131,11 +131,6 @@ func (enc *DictInt32Encoder) Type() parquet.Type {
return parquet.Types.Int32
}
-// WriteDict populates the byte slice with the dictionary index
-func (enc *DictInt32Encoder) WriteDict(out []byte) {
- enc.memo.CopyValues(arrow.Int32Traits.CastFromBytes(out))
-}
-
// Put encodes the values passed in, adding to the index as needed.
func (enc *DictInt32Encoder) Put(in []int32) {
for _, val := range in {
@@ -347,11 +342,6 @@ func (enc *DictInt64Encoder) Type() parquet.Type {
return parquet.Types.Int64
}
-// WriteDict populates the byte slice with the dictionary index
-func (enc *DictInt64Encoder) WriteDict(out []byte) {
- enc.memo.CopyValues(arrow.Int64Traits.CastFromBytes(out))
-}
-
// Put encodes the values passed in, adding to the index as needed.
func (enc *DictInt64Encoder) Put(in []int64) {
for _, val := range in {
@@ -609,11 +599,6 @@ func (enc *DictFloat32Encoder) Type() parquet.Type {
return parquet.Types.Float
}
-// WriteDict populates the byte slice with the dictionary index
-func (enc *DictFloat32Encoder) WriteDict(out []byte) {
- enc.memo.CopyValues(arrow.Float32Traits.CastFromBytes(out))
-}
-
// Put encodes the values passed in, adding to the index as needed.
func (enc *DictFloat32Encoder) Put(in []float32) {
for _, val := range in {
@@ -813,11 +798,6 @@ func (enc *DictFloat64Encoder) Type() parquet.Type {
return parquet.Types.Double
}
-// WriteDict populates the byte slice with the dictionary index
-func (enc *DictFloat64Encoder) WriteDict(out []byte) {
- enc.memo.CopyValues(arrow.Float64Traits.CastFromBytes(out))
-}
-
// Put encodes the values passed in, adding to the index as needed.
func (enc *DictFloat64Encoder) Put(in []float64) {
for _, val := range in {
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
index 0667143..d2ebbe4 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -162,11 +162,6 @@ func (enc *Dict{{.Name}}Encoder) Type() parquet.Type {
}
{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}
-// WriteDict populates the byte slice with the dictionary index
-func (enc *Dict{{.Name}}Encoder) WriteDict(out []byte) {
- enc.memo.CopyValues({{.prefix}}.{{.Name}}Traits.CastFromBytes(out))
-}
-
// Put encodes the values passed in, adding to the index as needed.
func (enc *Dict{{.Name}}Encoder) Put(in []{{.name}}) {
for _, val := range in {
diff --git a/go/parquet/internal/hashing/xxh3_memo_table.gen.go
b/go/parquet/internal/hashing/xxh3_memo_table.gen.go
index b2ebd87..7c0a67b 100644
--- a/go/parquet/internal/hashing/xxh3_memo_table.gen.go
+++ b/go/parquet/internal/hashing/xxh3_memo_table.gen.go
@@ -21,7 +21,9 @@ package hashing
import (
"math"
+ "github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/bitutil"
+ "github.com/apache/arrow/go/parquet/internal/utils"
)
type payloadInt32 struct {
@@ -83,6 +85,20 @@ func (h *Int32HashTable) CopyValuesSubset(start int, out
[]int32) {
})
}
+func (h *Int32HashTable) WriteOut(out []byte) {
+ h.WriteOutSubset(0, out)
+}
+
+func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
+ data := arrow.Int32Traits.CastFromBytes(out)
+ h.VisitEntries(func(e *entryInt32) {
+ idx := e.payload.memoIdx - int32(start)
+ if idx >= 0 {
+ data[idx] = utils.ToLEInt32(e.payload.val)
+ }
+ })
+}
+
func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor)
>= h.cap }
func (Int32HashTable) fixHash(v uint64) uint64 {
@@ -231,6 +247,14 @@ func (s *Int32MemoTable) CopyValuesSubset(start int, out
interface{}) {
s.tbl.CopyValuesSubset(start, out.([]int32))
}
+func (s *Int32MemoTable) WriteOut(out []byte) {
+ s.tbl.WriteOut(out)
+}
+
+func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
+ s.tbl.WriteOutSubset(start, out)
+}
+
// Get returns the index of the requested value in the hash table or
KeyNotFound
// along with a boolean indicating if it was found or not.
func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
@@ -321,6 +345,20 @@ func (h *Int64HashTable) CopyValuesSubset(start int, out
[]int64) {
})
}
+func (h *Int64HashTable) WriteOut(out []byte) {
+ h.WriteOutSubset(0, out)
+}
+
+func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
+ data := arrow.Int64Traits.CastFromBytes(out)
+ h.VisitEntries(func(e *entryInt64) {
+ idx := e.payload.memoIdx - int32(start)
+ if idx >= 0 {
+ data[idx] = utils.ToLEInt64(e.payload.val)
+ }
+ })
+}
+
func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor)
>= h.cap }
func (Int64HashTable) fixHash(v uint64) uint64 {
@@ -469,6 +507,14 @@ func (s *Int64MemoTable) CopyValuesSubset(start int, out
interface{}) {
s.tbl.CopyValuesSubset(start, out.([]int64))
}
+func (s *Int64MemoTable) WriteOut(out []byte) {
+ s.tbl.WriteOut(out)
+}
+
+func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
+ s.tbl.WriteOutSubset(start, out)
+}
+
// Get returns the index of the requested value in the hash table or
KeyNotFound
// along with a boolean indicating if it was found or not.
func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
@@ -559,6 +605,20 @@ func (h *Float32HashTable) CopyValuesSubset(start int, out
[]float32) {
})
}
+func (h *Float32HashTable) WriteOut(out []byte) {
+ h.WriteOutSubset(0, out)
+}
+
+func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
+ data := arrow.Float32Traits.CastFromBytes(out)
+ h.VisitEntries(func(e *entryFloat32) {
+ idx := e.payload.memoIdx - int32(start)
+ if idx >= 0 {
+ data[idx] = utils.ToLEFloat32(e.payload.val)
+ }
+ })
+}
+
func (h *Float32HashTable) needUpsize() bool { return
h.size*uint64(loadFactor) >= h.cap }
func (Float32HashTable) fixHash(v uint64) uint64 {
@@ -707,6 +767,14 @@ func (s *Float32MemoTable) CopyValuesSubset(start int, out
interface{}) {
s.tbl.CopyValuesSubset(start, out.([]float32))
}
+func (s *Float32MemoTable) WriteOut(out []byte) {
+ s.tbl.WriteOut(out)
+}
+
+func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
+ s.tbl.WriteOutSubset(start, out)
+}
+
// Get returns the index of the requested value in the hash table or
KeyNotFound
// along with a boolean indicating if it was found or not.
func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
@@ -816,6 +884,20 @@ func (h *Float64HashTable) CopyValuesSubset(start int, out
[]float64) {
})
}
+func (h *Float64HashTable) WriteOut(out []byte) {
+ h.WriteOutSubset(0, out)
+}
+
+func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
+ data := arrow.Float64Traits.CastFromBytes(out)
+ h.VisitEntries(func(e *entryFloat64) {
+ idx := e.payload.memoIdx - int32(start)
+ if idx >= 0 {
+ data[idx] = utils.ToLEFloat64(e.payload.val)
+ }
+ })
+}
+
func (h *Float64HashTable) needUpsize() bool { return
h.size*uint64(loadFactor) >= h.cap }
func (Float64HashTable) fixHash(v uint64) uint64 {
@@ -964,6 +1046,14 @@ func (s *Float64MemoTable) CopyValuesSubset(start int,
out interface{}) {
s.tbl.CopyValuesSubset(start, out.([]float64))
}
+func (s *Float64MemoTable) WriteOut(out []byte) {
+ s.tbl.WriteOut(out)
+}
+
+func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
+ s.tbl.WriteOutSubset(start, out)
+}
+
// Get returns the index of the requested value in the hash table or
KeyNotFound
// along with a boolean indicating if it was found or not.
func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
diff --git a/go/parquet/internal/hashing/xxh3_memo_table.gen.go.tmpl
b/go/parquet/internal/hashing/xxh3_memo_table.gen.go.tmpl
index a56009b..7a84f1c 100644
--- a/go/parquet/internal/hashing/xxh3_memo_table.gen.go.tmpl
+++ b/go/parquet/internal/hashing/xxh3_memo_table.gen.go.tmpl
@@ -18,6 +18,7 @@ package hashing
import (
"github.com/apache/arrow/go/arrow/bitutil"
+ "github.com/apache/arrow/go/parquet/internal/utils"
)
{{range .In}}
@@ -80,6 +81,20 @@ func (h *{{.Name}}HashTable) CopyValuesSubset(start int, out
[]{{.name}}) {
})
}
+func (h *{{.Name}}HashTable) WriteOut(out []byte) {
+ h.WriteOutSubset(0, out)
+}
+
+func (h *{{.Name}}HashTable) WriteOutSubset(start int, out []byte) {
+ data := arrow.{{.Name}}Traits.CastFromBytes(out)
+ h.VisitEntries(func(e *entry{{.Name}}) {
+ idx := e.payload.memoIdx - int32(start)
+ if idx >= 0 {
+ data[idx] = utils.ToLE{{.Name}}(e.payload.val)
+ }
+ })
+}
+
func (h *{{.Name}}HashTable) needUpsize() bool { return
h.size*uint64(loadFactor) >= h.cap }
func ({{.Name}}HashTable) fixHash(v uint64) uint64 {
@@ -228,6 +243,14 @@ func (s *{{.Name}}MemoTable) CopyValuesSubset(start int,
out interface{}) {
s.tbl.CopyValuesSubset(start, out.([]{{.name}}))
}
+func (s *{{.Name}}MemoTable) WriteOut(out []byte) {
+ s.tbl.WriteOut(out)
+}
+
+func (s *{{.Name}}MemoTable) WriteOutSubset(start int, out []byte) {
+ s.tbl.WriteOutSubset(start, out)
+}
+
// Get returns the index of the requested value in the hash table or
KeyNotFound
// along with a boolean indicating if it was found or not.
func (s *{{.Name}}MemoTable) Get(val interface{}) (int, bool) {
diff --git a/go/parquet/internal/hashing/xxh3_memo_table.go
b/go/parquet/internal/hashing/xxh3_memo_table.go
index c1a7058..db15941 100644
--- a/go/parquet/internal/hashing/xxh3_memo_table.go
+++ b/go/parquet/internal/hashing/xxh3_memo_table.go
@@ -339,6 +339,14 @@ func (b *BinaryMemoTable) CopyValuesSubset(start int, out
interface{}) {
copy(outval, b.builder.Value(start)[0:length])
}
+func (b *BinaryMemoTable) WriteOut(out []byte) {
+ b.CopyValues(out)
+}
+
+func (b *BinaryMemoTable) WriteOutSubset(start int, out []byte) {
+ b.CopyValuesSubset(start, out)
+}
+
// CopyFixedWidthValues exists to cope with the fact that the table doesn't
keep
// track of the fixed width when inserting the null value the databuffer holds
a
// zero length byte slice for the null value (if found)
diff --git a/go/parquet/internal/utils/bit_block_counter.go
b/go/parquet/internal/utils/bit_block_counter.go
index 119171a..3576ac9 100644
--- a/go/parquet/internal/utils/bit_block_counter.go
+++ b/go/parquet/internal/utils/bit_block_counter.go
@@ -22,21 +22,10 @@ import (
"unsafe"
"github.com/apache/arrow/go/arrow/bitutil"
- "github.com/apache/arrow/go/arrow/endian"
)
-var toLEFunc func(uint64) uint64
-
-func init() {
- if endian.IsBigEndian {
- toLEFunc = bits.ReverseBytes64
- } else {
- toLEFunc = func(in uint64) uint64 { return in }
- }
-}
-
func loadWord(byt []byte) uint64 {
- return toLEFunc(*(*uint64)(unsafe.Pointer(&byt[0])))
+ return ToLEUint64(*(*uint64)(unsafe.Pointer(&byt[0])))
}
func shiftWord(current, next uint64, shift int64) uint64 {
diff --git a/go/parquet/internal/utils/bit_run_reader.go
b/go/parquet/internal/utils/bit_run_reader.go
index 7af0aa0..47ddaa3 100644
--- a/go/parquet/internal/utils/bit_run_reader.go
+++ b/go/parquet/internal/utils/bit_run_reader.go
@@ -137,7 +137,7 @@ func (b *bitRunReader) loadWord(bitsRemaining int64) {
bitutil.SetBitTo(wordptr, int(bitsRemaining),
bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1)))
// reset the value to little endian for big endian architectures
- b.word = toLEFunc(b.word)
+ b.word = ToLEUint64(b.word)
}
// Two cases:
diff --git a/go/parquet/internal/utils/endians_default.go
b/go/parquet/internal/utils/endians_default.go
new file mode 100644
index 0000000..c996d95
--- /dev/null
+++ b/go/parquet/internal/utils/endians_default.go
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !s390x
+
+package utils
+
+var (
+ ToLEUint32 = func(x uint32) uint32 { return x }
+ ToLEUint64 = func(x uint64) uint64 { return x }
+ ToLEInt32 = func(x int32) int32 { return x }
+ ToLEInt64 = func(x int64) int64 { return x }
+ ToLEFloat32 = func(x float32) float32 { return x }
+ ToLEFloat64 = func(x float64) float64 { return x }
+)
diff --git a/go/parquet/internal/utils/endians_s390x.go
b/go/parquet/internal/utils/endians_s390x.go
new file mode 100644
index 0000000..86d97a7
--- /dev/null
+++ b/go/parquet/internal/utils/endians_s390x.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utils
+
+import (
+ "math"
+ "math/bits"
+)
+
+var (
+ ToLEUint32 = bits.ReverseBytes32
+ ToLEUint64 = bits.ReverseBytes64
+ ToLEInt32 = func(x int32) int32 { return
int32(bits.ReverseBytes32(uint32(x))) }
+ ToLEInt64 = func(x int64) int64 { return
int64(bits.ReverseBytes64(uint64(x))) }
+ ToLEFloat32 = func(x float32) float32 { return
math.Float32frombits(bits.ReverseBytes32(math.Float32bits(x))) }
+ ToLEFloat64 = func(x float64) float64 { return
math.Float64frombits(bits.ReverseBytes64(math.Float64bits(x))) }
+)
diff --git a/go/parquet/metadata/statistics.go
b/go/parquet/metadata/statistics.go
index 85910c8..d03af87 100644
--- a/go/parquet/metadata/statistics.go
+++ b/go/parquet/metadata/statistics.go
@@ -27,6 +27,7 @@ import (
"github.com/apache/arrow/go/parquet"
"github.com/apache/arrow/go/parquet/internal/debug"
"github.com/apache/arrow/go/parquet/internal/encoding"
+ "github.com/apache/arrow/go/parquet/internal/utils"
format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
"github.com/apache/arrow/go/parquet/schema"
)
@@ -431,16 +432,20 @@ func (Float64Statistics) less(a, b float64) bool { return
a < b }
func (s *Int96Statistics) less(a, b parquet.Int96) bool {
i96a := arrow.Uint32Traits.CastFromBytes(a[:])
i96b := arrow.Uint32Traits.CastFromBytes(b[:])
- if i96a[2] != i96b[2] {
+
+ a0, a1, a2 := utils.ToLEUint32(i96a[0]), utils.ToLEUint32(i96a[1]),
utils.ToLEUint32(i96a[2])
+ b0, b1, b2 := utils.ToLEUint32(i96b[0]), utils.ToLEUint32(i96b[1]),
utils.ToLEUint32(i96b[2])
+
+ if a2 != b2 {
// only the msb bit is by signed comparison
if s.order == schema.SortSIGNED {
- return int32(i96a[2]) < int32(i96b[2])
+ return int32(a2) < int32(b2)
}
- return i96a[2] < i96b[2]
- } else if i96a[1] != i96b[1] {
- return i96a[1] < i96b[1]
+ return a2 < b2
+ } else if a1 != b1 {
+ return a1 < b1
}
- return i96a[0] < i96b[0]
+ return a0 < b0
}
func (s *ByteArrayStatistics) less(a, b parquet.ByteArray) bool {