This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 42d10c3 ARROW-13964: MINOR: [Go][Parquet] remove base bitmap
reader/writer from parquet module, use arrow bitutil ones
42d10c3 is described below
commit 42d10c3851e8fb2dd723b43fee2dfb746b97898a
Author: Matthew Topol <[email protected]>
AuthorDate: Thu Sep 9 16:17:54 2021 -0400
ARROW-13964: MINOR: [Go][Parquet] remove base bitmap reader/writer from
parquet module, use arrow bitutil ones
ARROW-13963 shifted the implementations of the bitmap reader and bitmap
writer for Go into the Arrow bitutil package. This removes them from the
Parquet internal packages and has the parquet implementation now just use the
Arrow Bitutil versions. There aren't changes, it's just moving the
implementation to a shared location so continued work on the Golang arrow
implementation can just use the same bitmap reader/writer instead of having its
own copy from the Parquet impl.
Closes #11126 from zeroshade/parquetbits
Authored-by: Matthew Topol <[email protected]>
Signed-off-by: Matthew Topol <[email protected]>
---
go/arrow/bitutil/bitmaps_test.go | 100 +++++++++++++
go/parquet/go.mod | 6 +-
go/parquet/go.sum | 79 +++++++---
go/parquet/internal/encoding/boolean_encoder.go | 2 +-
go/parquet/internal/utils/bit_benchmark_test.go | 105 +-------------
.../internal/utils/bit_block_counter_test.go | 4 +-
go/parquet/internal/utils/bit_run_reader_test.go | 2 +-
.../internal/utils/bit_set_run_reader_test.go | 30 ++--
go/parquet/internal/utils/bitmap_reader.go | 72 ----------
go/parquet/internal/utils/bitmap_reader_test.go | 75 ----------
go/parquet/internal/utils/bitmap_writer.go | 160 ++-------------------
11 files changed, 197 insertions(+), 438 deletions(-)
diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go
index 419fc1b..211949c 100644
--- a/go/arrow/bitutil/bitmaps_test.go
+++ b/go/arrow/bitutil/bitmaps_test.go
@@ -242,3 +242,103 @@ func BenchmarkCopyBitmapWithOffsetBoth(b *testing.B) {
})
}
}
+
+const bufferSize = 1024 * 8
+
+// a naive bitmap reader for a baseline
+
+type NaiveBitmapReader struct {
+ bitmap []byte
+ pos int
+}
+
+func (n *NaiveBitmapReader) IsSet() bool { return
bitutil.BitIsSet(n.bitmap, n.pos) }
+func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() }
+func (n *NaiveBitmapReader) Next() { n.pos++ }
+
+// naive bitmap writer for a baseline
+
+type NaiveBitmapWriter struct {
+ bitmap []byte
+ pos int
+}
+
+func (n *NaiveBitmapWriter) Set() {
+ byteOffset := n.pos / 8
+ bitOffset := n.pos % 8
+ bitSetMask := uint8(1 << bitOffset)
+ n.bitmap[byteOffset] |= bitSetMask
+}
+
+func (n *NaiveBitmapWriter) Clear() {
+ byteOffset := n.pos / 8
+ bitOffset := n.pos % 8
+ bitClearMask := uint8(0xFF ^ (1 << bitOffset))
+ n.bitmap[byteOffset] &= bitClearMask
+}
+
+func (n *NaiveBitmapWriter) Next() { n.pos++ }
+func (n *NaiveBitmapWriter) Finish() {}
+
+func randomBuffer(nbytes int64) []byte {
+ buf := make([]byte, nbytes)
+ r := rand.New(rand.NewSource(0))
+ r.Read(buf)
+ return buf
+}
+
+func BenchmarkBitmapReader(b *testing.B) {
+ buf := randomBuffer(bufferSize)
+ nbits := bufferSize * 8
+
+ b.Run("naive baseline", func(b *testing.B) {
+ b.SetBytes(2 * bufferSize)
+ for i := 0; i < b.N; i++ {
+ {
+ total := 0
+ rdr := NaiveBitmapReader{buf, 0}
+ for j := 0; j < nbits; j++ {
+ if rdr.IsSet() {
+ total++
+ }
+ rdr.Next()
+ }
+ }
+ {
+ total := 0
+ rdr := NaiveBitmapReader{buf, 0}
+ for j := 0; j < nbits; j++ {
+ if rdr.IsSet() {
+ total++
+ }
+ rdr.Next()
+ }
+ }
+ }
+ })
+ b.Run("bitmap reader", func(b *testing.B) {
+ b.SetBytes(2 * bufferSize)
+ for i := 0; i < b.N; i++ {
+ {
+ total := 0
+ rdr := bitutil.NewBitmapReader(buf, 0, nbits)
+ for j := 0; j < nbits; j++ {
+ if rdr.Set() {
+ total++
+ }
+ rdr.Next()
+ }
+ }
+ {
+ total := 0
+ rdr := bitutil.NewBitmapReader(buf, 0, nbits)
+ for j := 0; j < nbits; j++ {
+ if rdr.Set() {
+ total++
+ }
+ rdr.Next()
+ }
+ }
+ }
+ })
+}
diff --git a/go/parquet/go.mod b/go/parquet/go.mod
index cf2be66..ccb401e 100644
--- a/go/parquet/go.mod
+++ b/go/parquet/go.mod
@@ -21,17 +21,17 @@ go 1.15
require (
github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216
github.com/andybalholm/brotli v1.0.1
- github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677
+ github.com/apache/arrow/go/arrow v0.0.0-20210909182554-946bdcf83611
github.com/apache/thrift/lib/go/thrift
v0.0.0-20210120171102-e27e82c46ba4
github.com/golang/snappy v0.0.3
github.com/klauspost/asmfmt v1.2.3
- github.com/klauspost/compress v1.12.2
+ github.com/klauspost/compress v1.13.1
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
github.com/stretchr/testify v1.7.0
github.com/zeebo/xxh3 v0.10.0
golang.org/x/exp v0.0.0-20210220032938-85be41e4509f
- golang.org/x/sys v0.0.0-20210309074719-68d13333faf2
+ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
gonum.org/v1/gonum v0.8.2
)
diff --git a/go/parquet/go.sum b/go/parquet/go.sum
index bebc1ff..cf7b678 100644
--- a/go/parquet/go.sum
+++ b/go/parquet/go.sum
@@ -1,4 +1,5 @@
cloud.google.com/go v0.26.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.34.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod
h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod
h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod
h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
@@ -7,20 +8,28 @@ github.com/JohnCGriffin/overflow
v0.0.0-20170615021017-4d914c927216/go.mod h1:X0
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod
h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/andybalholm/brotli v1.0.1
h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod
h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
-github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677
h1:F7HiqIf4aBsF4YUBcLolXZ8duSEideNnZnr3lBGa2sA=
-github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677/go.mod
h1:R4hW3Ug0s+n4CUsWHKOj00Pu01ZqU4x/hSF5kXUcXKQ=
+github.com/antihax/optional v1.0.0/go.mod
h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
+github.com/apache/arrow/go/arrow v0.0.0-20210909182554-946bdcf83611
h1:17eVDdLcmI8iNy6dDimoLT+5jG3Y68KxZkDkKI1rLuw=
+github.com/apache/arrow/go/arrow v0.0.0-20210909182554-946bdcf83611/go.mod
h1:2qMFB56yOP3KzkB3PbYZ4AlUFg3a88F67TIx5lB/WwY=
github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4
h1:orNYqmQGnSjgOauLWjHEp9/qIDT98xv/0Aa4Zet3/Y8=
github.com/apache/thrift/lib/go/thrift
v0.0.0-20210120171102-e27e82c46ba4/go.mod
h1:V/LzksIyqd3KZuQ2SunvReTG/UkArhII1dAWY5U1sCE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod
h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod
h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod
h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/davecgh/go-spew v1.1.0
h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
+github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod
h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod
h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/davecgh/go-spew v1.1.0/go.mod
h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1
h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod
h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod
h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane
v0.9.1-0.20191026205805-5f8ba28d4473/go.mod
h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod
h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/go-control-plane
v0.9.9-0.20201210154907-fd9021fe5dad/go.mod
h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane
v0.9.9-0.20210217033140-668b12f5399d/go.mod
h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane
v0.9.9-0.20210512163311-63b5d3c536b0/go.mod
h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod
h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/ghodss/yaml v1.0.0/go.mod
h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod
h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod
h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod
h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
@@ -35,37 +44,45 @@ github.com/golang/protobuf
v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
github.com/golang/protobuf v1.4.0/go.mod
h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod
h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod
h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.4.3/go.mod
h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod
h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2/go.mod
h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
github.com/golang/snappy v0.0.3/go.mod
h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/google/flatbuffers v1.11.0
h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A=
-github.com/google/flatbuffers v1.11.0/go.mod
h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/flatbuffers v2.0.0+incompatible
h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI=
+github.com/google/flatbuffers v2.0.0+incompatible/go.mod
h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/go-cmp v0.2.0/go.mod
h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod
h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod
h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.5/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.6/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/uuid v1.1.2/go.mod
h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod
h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/klauspost/asmfmt v1.2.3
h1:qEM7SLDo6DXXXz5yTpqUoxhsrtwH30nNR2riO2ZjznY=
github.com/klauspost/asmfmt v1.2.3/go.mod
h1:RAoUvqkWr2rUa2I19qKMEVZQe4BVtcHGTMCUOcCU2Lg=
-github.com/klauspost/compress v1.11.13/go.mod
h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.12.2
h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8=
-github.com/klauspost/compress v1.12.2/go.mod
h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/klauspost/compress v1.13.1
h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
+github.com/klauspost/compress v1.13.1/go.mod
h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod
h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod
h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
-github.com/pierrec/lz4/v4 v4.1.4
h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
-github.com/pierrec/lz4/v4 v4.1.4/go.mod
h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pierrec/lz4/v4 v4.1.8
h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
+github.com/pierrec/lz4/v4 v4.1.8/go.mod
h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pmezard/go-difflib v1.0.0
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod
h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/rogpeppe/fastuuid v1.2.0/go.mod
h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/stretchr/objx v0.1.0/go.mod
h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.2.0/go.mod
h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.5.1/go.mod
h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0
h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod
h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/yuin/goldmark v1.3.5/go.mod
h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/zeebo/xxh3 v0.10.0 h1:1+2Mov9zfxTNUeoDG9k9i13VfxTR0p1JQu8L0vikxB0=
github.com/zeebo/xxh3 v0.10.0/go.mod
h1:AQY73TOrhF3jNsdiM9zZOb8MThrYbZONHj7ryDBaLpg=
+go.opentelemetry.io/proto/otlp v0.7.0/go.mod
h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod
h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@@ -83,34 +100,48 @@ golang.org/x/image
v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+o
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod
h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod
h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20201217150744-e6ae53a27f4f/go.mod
h1:skQtrUTUwhdJvXM/2KKJzY8pDgNr9I/FOMqDVRPBUS4=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod
h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod
h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449/go.mod
h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod
h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod
h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod
h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod
h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod
h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod
h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod
h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod
h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod
h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200727154430-2d971f7391a4/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210309074719-68d13333faf2
h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY=
-golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c
h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod
h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -121,7 +152,9 @@ golang.org/x/tools
v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod
h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200117012304-6edc0a871e69/go.mod
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.1.4/go.mod
h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -137,13 +170,17 @@ google.golang.org/appengine v1.1.0/go.mod
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl
google.golang.org/appengine v1.4.0/go.mod
h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod
h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod
h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod
h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod
h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod
h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U=
google.golang.org/grpc v1.19.0/go.mod
h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod
h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod
h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod
h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.32.0/go.mod
h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.33.1/go.mod
h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
+google.golang.org/grpc v1.36.0/go.mod
h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
+google.golang.org/grpc v1.38.0/go.mod
h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
+google.golang.org/grpc v1.39.0/go.mod
h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod
h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod
h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod
h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -152,10 +189,14 @@ google.golang.org/protobuf v1.21.0/go.mod
h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
google.golang.org/protobuf v1.22.0/go.mod
h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod
h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod
h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.24.0/go.mod
h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod
h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod
h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod
h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1/go.mod
h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405
h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod
h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod
h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/go/parquet/internal/encoding/boolean_encoder.go
b/go/parquet/internal/encoding/boolean_encoder.go
index 617eaa9..ba06d4c 100644
--- a/go/parquet/internal/encoding/boolean_encoder.go
+++ b/go/parquet/internal/encoding/boolean_encoder.go
@@ -69,7 +69,7 @@ func (enc *PlainBooleanEncoder) PutSpaced(in []bool,
validBits []byte, validBits
// EstimatedDataEncodedSize returns the current number of bytes that have
// been buffered so far
func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
- return int64(enc.sink.Len() + int(bitutil.BytesForBits(enc.wr.Pos())))
+ return int64(enc.sink.Len() +
int(bitutil.BytesForBits(int64(enc.wr.Pos()))))
}
// FlushValues returns the buffered data, the responsibility is on the caller
diff --git a/go/parquet/internal/utils/bit_benchmark_test.go
b/go/parquet/internal/utils/bit_benchmark_test.go
index d91ab55..7539dfe 100644
--- a/go/parquet/internal/utils/bit_benchmark_test.go
+++ b/go/parquet/internal/utils/bit_benchmark_test.go
@@ -17,7 +17,6 @@
package utils_test
import (
- "math/rand"
"strconv"
"testing"
@@ -26,50 +25,6 @@ import (
"github.com/apache/arrow/go/parquet/internal/utils"
)
-const bufferSize = 1024 * 8
-
-// a naive bitmap reader for a baseline
-
-type NaiveBitmapReader struct {
- bitmap []byte
- pos int
-}
-
-func (n *NaiveBitmapReader) IsSet() bool { return
bitutil.BitIsSet(n.bitmap, n.pos) }
-func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() }
-func (n *NaiveBitmapReader) Next() { n.pos++ }
-
-// naive bitmap writer for a baseline
-
-type NaiveBitmapWriter struct {
- bitmap []byte
- pos int
-}
-
-func (n *NaiveBitmapWriter) Set() {
- byteOffset := n.pos / 8
- bitOffset := n.pos % 8
- bitSetMask := uint8(1 << bitOffset)
- n.bitmap[byteOffset] |= bitSetMask
-}
-
-func (n *NaiveBitmapWriter) Clear() {
- byteOffset := n.pos / 8
- bitOffset := n.pos % 8
- bitClearMask := uint8(0xFF ^ (1 << bitOffset))
- n.bitmap[byteOffset] &= bitClearMask
-}
-
-func (n *NaiveBitmapWriter) Next() { n.pos++ }
-func (n *NaiveBitmapWriter) Finish() {}
-
-func randomBuffer(nbytes int64) []byte {
- buf := make([]byte, nbytes)
- r := rand.New(rand.NewSource(0))
- r.Read(buf)
- return buf
-}
-
func randomBitsBuffer(nbits, setPct int64) []byte {
rag := testutils.NewRandomArrayGenerator(23)
prob := float64(0)
@@ -80,7 +35,7 @@ func randomBitsBuffer(nbits, setPct int64) []byte {
rag.GenerateBitmap(buf, nbits, prob)
if setPct == -1 {
- wr := utils.NewBitmapWriter(buf, 0, nbits)
+ wr := bitutil.NewBitmapWriter(buf, 0, int(nbits))
for i := int64(0); i < nbits; i++ {
if i%2 == 0 {
wr.Set()
@@ -93,62 +48,6 @@ func randomBitsBuffer(nbits, setPct int64) []byte {
return buf
}
-func BenchmarkBitmapReader(b *testing.B) {
- buf := randomBuffer(bufferSize)
- nbits := bufferSize * 8
-
- b.Run("naive baseline", func(b *testing.B) {
- b.SetBytes(2 * bufferSize)
- for i := 0; i < b.N; i++ {
- {
- total := 0
- rdr := NaiveBitmapReader{buf, 0}
- for j := 0; j < nbits; j++ {
- if rdr.IsSet() {
- total++
- }
- rdr.Next()
- }
- }
- {
- total := 0
- rdr := NaiveBitmapReader{buf, 0}
- for j := 0; j < nbits; j++ {
- if rdr.IsSet() {
- total++
- }
- rdr.Next()
- }
- }
- }
- })
- b.Run("bitmap reader", func(b *testing.B) {
- b.SetBytes(2 * bufferSize)
- for i := 0; i < b.N; i++ {
- {
- total := 0
- rdr := utils.NewBitmapReader(buf, 0,
int64(nbits))
- for j := 0; j < nbits; j++ {
- if rdr.Set() {
- total++
- }
- rdr.Next()
- }
- }
- {
- total := 0
- rdr := utils.NewBitmapReader(buf, 0,
int64(nbits))
- for j := 0; j < nbits; j++ {
- if rdr.Set() {
- total++
- }
- rdr.Next()
- }
- }
- }
- })
-}
-
func testBitRunReader(rdr utils.BitRunReader) (setTotal int64) {
for {
br := rdr.NextRun()
@@ -170,7 +69,7 @@ func BenchmarkBitRunReader(b *testing.B) {
b.Run("linear", func(b *testing.B) {
b.SetBytes(numBits / 8)
for i := 0; i < b.N; i++ {
- rdr :=
linearBitRunReader{utils.NewBitmapReader(buf, 0, numBits)}
+ rdr :=
linearBitRunReader{bitutil.NewBitmapReader(buf, 0, numBits)}
testBitRunReader(rdr)
}
})
diff --git a/go/parquet/internal/utils/bit_block_counter_test.go
b/go/parquet/internal/utils/bit_block_counter_test.go
index 6ec5a87..86c55cd 100644
--- a/go/parquet/internal/utils/bit_block_counter_test.go
+++ b/go/parquet/internal/utils/bit_block_counter_test.go
@@ -100,7 +100,7 @@ func TestOneWordWithOffsets(t *testing.T) {
assert.EqualValues(t, 63, block.Popcnt)
// Set the next word to all false
- utils.SetBitsTo(buf.Bytes(), 2*kWordSize+offset, kWordSize,
false)
+ bitutil.SetBitsTo(buf.Bytes(), 2*kWordSize+offset, kWordSize,
false)
block = counter.NextWord()
assert.EqualValues(t, 64, block.Len)
@@ -150,7 +150,7 @@ func TestFourWordsWithOffsets(t *testing.T) {
assert.EqualValues(t, 253, block.Popcnt)
// set the next two words to all false
- utils.SetBitsTo(buf.Bytes(), 8*kWordSize+offset, 2*kWordSize,
false)
+ bitutil.SetBitsTo(buf.Bytes(), 8*kWordSize+offset, 2*kWordSize,
false)
// block is half set
block = counter.NextFourWords()
diff --git a/go/parquet/internal/utils/bit_run_reader_test.go
b/go/parquet/internal/utils/bit_run_reader_test.go
index e34026e..2b3cc4d 100644
--- a/go/parquet/internal/utils/bit_run_reader_test.go
+++ b/go/parquet/internal/utils/bit_run_reader_test.go
@@ -44,7 +44,7 @@ func TestBitRunReaderZeroLength(t *testing.T) {
func bitmapFromSlice(vals []int, bitOffset int64) []byte {
out := make([]byte,
int(bitutil.BytesForBits(int64(len(vals))+bitOffset)))
- writer := utils.NewBitmapWriter(out, bitOffset, int64(len(vals)))
+ writer := bitutil.NewBitmapWriter(out, int(bitOffset), len(vals))
for _, val := range vals {
if val == 1 {
writer.Set()
diff --git a/go/parquet/internal/utils/bit_set_run_reader_test.go
b/go/parquet/internal/utils/bit_set_run_reader_test.go
index 2165b77..453d103 100644
--- a/go/parquet/internal/utils/bit_set_run_reader_test.go
+++ b/go/parquet/internal/utils/bit_set_run_reader_test.go
@@ -34,7 +34,7 @@ func reverseAny(s interface{}) {
}
type linearBitRunReader struct {
- reader *utils.BitmapReader
+ reader *bitutil.BitmapReader
}
func (l linearBitRunReader) NextRun() utils.BitRun {
@@ -68,16 +68,16 @@ func bitmapFromString(s string) []byte {
return ret[:actualLen]
}
-func referenceBitRuns(data []byte, offset, length int64) (ret
[]utils.SetBitRun) {
+func referenceBitRuns(data []byte, offset, length int) (ret []utils.SetBitRun)
{
ret = make([]utils.SetBitRun, 0)
- reader := linearBitRunReader{utils.NewBitmapReader(data, offset,
length)}
- pos := int64(0)
+ reader := linearBitRunReader{bitutil.NewBitmapReader(data, offset,
length)}
+ pos := 0
for pos < length {
br := reader.NextRun()
if br.Set {
- ret = append(ret, utils.SetBitRun{pos, br.Len})
+ ret = append(ret, utils.SetBitRun{int64(pos), br.Len})
}
- pos += br.Len
+ pos += int(br.Len)
}
return
}
@@ -165,10 +165,10 @@ func (br *BitSetRunReaderSuite) TestOneByte() {
for _, str := range []string{"01101101", "10110110", "00000000",
"11111111"} {
buf := bitmapFromString(str)
- for offset := int64(0); offset < 8; offset++ {
- for length := int64(0); length <= 8-offset; length++ {
+ for offset := 0; offset < 8; offset++ {
+ for length := 0; length <= 8-offset; length++ {
expected := referenceBitRuns(buf, offset,
length)
- br.assertBitRuns(buf, offset, length, expected)
+ br.assertBitRuns(buf, int64(offset),
int64(length), expected)
}
}
}
@@ -215,7 +215,7 @@ func (br *BitSetRunReaderSuite) TestAllZeros() {
func (br *BitSetRunReaderSuite) TestAllOnes() {
const bufferSize = 256
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
- utils.SetBitsTo(buf, 0, bufferSize, true)
+ bitutil.SetBitsTo(buf, 0, bufferSize, true)
for _, rg := range br.bufferTestRanges(buf) {
if rg.Len > 0 {
@@ -235,9 +235,9 @@ func (br *BitSetRunReaderSuite) TestSmall() {
)
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
- utils.SetBitsTo(buf, 0, bufferSize, false)
- utils.SetBitsTo(buf, 0, onesLen, true)
- utils.SetBitsTo(buf, secondOnesStart, onesLen, true)
+ bitutil.SetBitsTo(buf, 0, bufferSize, false)
+ bitutil.SetBitsTo(buf, 0, onesLen, true)
+ bitutil.SetBitsTo(buf, secondOnesStart, onesLen, true)
for _, rg := range br.bufferTestRanges(buf) {
expected := []utils.SetBitRun{}
@@ -257,8 +257,8 @@ func (br *BitSetRunReaderSuite) TestSingleRun() {
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
for _, onesRg := range br.bufferTestRanges(buf) {
- utils.SetBitsTo(buf, 0, bufferSize, false)
- utils.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true)
+ bitutil.SetBitsTo(buf, 0, bufferSize, false)
+ bitutil.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true)
for _, rg := range br.bufferTestRanges(buf) {
expect := []utils.SetBitRun{}
diff --git a/go/parquet/internal/utils/bitmap_reader.go
b/go/parquet/internal/utils/bitmap_reader.go
deleted file mode 100644
index ace63bb..0000000
--- a/go/parquet/internal/utils/bitmap_reader.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-// BitmapReader is a simple bitmap reader for a byte slice.
-type BitmapReader struct {
- bitmap []byte
- pos int64
- len int64
-
- current byte
- byteOffset int64
- bitOffset int64
-}
-
-// NewBitmapReader creates and returns a new bitmap reader for the given bitmap
-func NewBitmapReader(bitmap []byte, offset, length int64) *BitmapReader {
- curbyte := byte(0)
- if length > 0 && bitmap != nil {
- curbyte = bitmap[offset/8]
- }
- return &BitmapReader{
- bitmap: bitmap,
- byteOffset: offset / 8,
- bitOffset: offset % 8,
- current: curbyte,
- len: length,
- }
-}
-
-// Set returns true if the current bit is set
-func (b *BitmapReader) Set() bool {
- return (b.current & (1 << b.bitOffset)) != 0
-}
-
-// NotSet returns true if the current bit is not set
-func (b *BitmapReader) NotSet() bool {
- return (b.current & (1 << b.bitOffset)) == 0
-}
-
-// Next advances the reader to the next bit in the bitmap.
-func (b *BitmapReader) Next() {
- b.bitOffset++
- b.pos++
- if b.bitOffset == 8 {
- b.bitOffset = 0
- b.byteOffset++
- if b.pos < b.len {
- b.current = b.bitmap[int(b.byteOffset)]
- }
- }
-}
-
-// Pos returns the current bit position in the bitmap that the reader is
looking at
-func (b *BitmapReader) Pos() int64 { return b.pos }
-
-// Len returns the total number of bits in the bitmap
-func (b *BitmapReader) Len() int64 { return b.len }
diff --git a/go/parquet/internal/utils/bitmap_reader_test.go
b/go/parquet/internal/utils/bitmap_reader_test.go
deleted file mode 100644
index e9a7eda..0000000
--- a/go/parquet/internal/utils/bitmap_reader_test.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils_test
-
-import (
- "testing"
-
- "github.com/apache/arrow/go/parquet/internal/utils"
- "github.com/stretchr/testify/assert"
-)
-
-func assertReaderVals(t *testing.T, reader *utils.BitmapReader, vals []bool) {
- for _, v := range vals {
- if v {
- assert.True(t, reader.Set())
- assert.False(t, reader.NotSet())
- } else {
- assert.True(t, reader.NotSet())
- assert.False(t, reader.Set())
- }
- reader.Next()
- }
-}
-
-func TestNormalOperation(t *testing.T) {
- for _, offset := range []int64{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75,
120} {
- buf := bitmapFromSlice([]int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0,
1, 0, 1}, offset)
-
- reader := utils.NewBitmapReader(buf, offset, 14)
- assertReaderVals(t, reader, []bool{false, true, true, true,
false, false, false, true, false, true, false, true, false, true})
- }
-}
-
-func TestDoesNotReadOutOfBounds(t *testing.T) {
- var bitmap [16]byte
- const length = 128
-
- reader := utils.NewBitmapReader(bitmap[:], 0, length)
- assert.EqualValues(t, length, reader.Len())
- assert.NotPanics(t, func() {
- for i := 0; i < length; i++ {
- assert.True(t, reader.NotSet())
- reader.Next()
- }
- })
- assert.EqualValues(t, length, reader.Pos())
-
- reader = utils.NewBitmapReader(bitmap[:], 5, length-5)
- assert.EqualValues(t, length-5, reader.Len())
- assert.NotPanics(t, func() {
- for i := 0; i < length-5; i++ {
- assert.True(t, reader.NotSet())
- reader.Next()
- }
- })
- assert.EqualValues(t, length-5, reader.Pos())
-
- assert.NotPanics(t, func() {
- reader = utils.NewBitmapReader(nil, 0, 0)
- })
-}
diff --git a/go/parquet/internal/utils/bitmap_writer.go
b/go/parquet/internal/utils/bitmap_writer.go
index f7c1f7a..c386705 100644
--- a/go/parquet/internal/utils/bitmap_writer.go
+++ b/go/parquet/internal/utils/bitmap_writer.go
@@ -18,71 +18,11 @@ package utils
import (
"encoding/binary"
- "math"
"math/bits"
"github.com/apache/arrow/go/arrow/bitutil"
- "github.com/apache/arrow/go/arrow/memory"
)
-var (
- // PrecedingBitmask is a convenience set of values as bitmasks for
checking
- // prefix bits of a byte
- PrecedingBitmask = [8]byte{0, 1, 3, 7, 15, 31, 63, 127}
- // TrailingBitmask is the bitwise complement version of
kPrecedingBitmask
- TrailingBitmask = [8]byte{255, 254, 252, 248, 240, 224, 192, 128}
-)
-
-// SetBitsTo is a convenience function to quickly set or unset all the bits
-// in a bitmap starting at startOffset for length bits.
-func SetBitsTo(bits []byte, startOffset, length int64, areSet bool) {
- if length == 0 {
- return
- }
-
- beg := startOffset
- end := startOffset + length
- var fill uint8 = 0
- if areSet {
- fill = math.MaxUint8
- }
-
- byteBeg := beg / 8
- byteEnd := end/8 + 1
-
- // don't modify bits before the startOffset by using this mask
- firstByteMask := PrecedingBitmask[beg%8]
- // don't modify bits past the length by using this mask
- lastByteMask := TrailingBitmask[end%8]
-
- if byteEnd == byteBeg+1 {
- // set bits within a single byte
- onlyByteMask := firstByteMask
- if end%8 != 0 {
- onlyByteMask = firstByteMask | lastByteMask
- }
-
- bits[byteBeg] &= onlyByteMask
- bits[byteBeg] |= fill &^ onlyByteMask
- return
- }
-
- // set/clear trailing bits of first byte
- bits[byteBeg] &= firstByteMask
- bits[byteBeg] |= fill &^ firstByteMask
-
- if byteEnd-byteBeg > 2 {
- memory.Set(bits[byteBeg+1:byteEnd-1], fill)
- }
-
- if end%8 == 0 {
- return
- }
-
- bits[byteEnd-1] &= lastByteMask
- bits[byteEnd-1] |= fill &^ lastByteMask
-}
-
// BitmapWriter is an interface for bitmap writers so that we can use multiple
// implementations or swap if necessary.
type BitmapWriter interface {
@@ -100,96 +40,22 @@ type BitmapWriter interface {
// of bools that were able to fit in the remaining length of the
bitmapwriter.
AppendBools(in []bool) int
// Pos is the current position that will be written next
- Pos() int64
+ Pos() int
// Reset allows reusing the bitmapwriter by resetting Pos to start with
length as
// the number of bits that the writer can write.
- Reset(start, length int64)
+ Reset(start, length int)
}
type bitmapWriter struct {
- buf []byte
- pos int64
- length int64
-
- curByte uint8
- bitMask uint8
- byteOffset int64
-}
-
-// NewBitmapWriter returns a sequential bitwise writer that preserves
surrounding
-// bit values as it writes.
-func NewBitmapWriter(bitmap []byte, start, length int64) BitmapWriter {
- ret := &bitmapWriter{
- buf: bitmap,
- length: length,
- byteOffset: start / 8,
- bitMask: bitutil.BitMask[start%8],
- }
- if length > 0 {
- ret.curByte = bitmap[int(ret.byteOffset)]
- }
- return ret
-}
-
-func (b *bitmapWriter) Reset(start, length int64) {
- b.pos = 0
- b.byteOffset = start / 8
- b.bitMask = bitutil.BitMask[start%8]
- b.length = length
- if b.length > 0 {
- b.curByte = b.buf[int(b.byteOffset)]
- }
-}
-
-func (b *bitmapWriter) Pos() int64 { return b.pos }
-func (b *bitmapWriter) Set() { b.curByte |= b.bitMask }
-func (b *bitmapWriter) Clear() { b.curByte &= ^b.bitMask }
-
-func (b *bitmapWriter) Next() {
- b.bitMask = b.bitMask << 1
- b.pos++
- if b.bitMask == 0 {
- b.bitMask = 0x01
- b.buf[b.byteOffset] = b.curByte
- b.byteOffset++
- if b.pos < b.length {
- b.curByte = b.buf[int(b.byteOffset)]
- }
- }
-}
-
-func (b *bitmapWriter) AppendBools(in []bool) int {
- space := Min(bitutil.BytesForBits(b.length-b.pos), int64(len(in)))
-
- // location that the first byte needs to be written to for appending
- appslice := b.buf[int(b.byteOffset):]
- // update everything but curByte
- bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
- appslice[0] = b.curByte
- for i, b := range in[:space] {
- if b {
- bitutil.SetBit(appslice, i)
- } else {
- bitutil.ClearBit(appslice, i)
- }
- }
-
- b.pos += space
- b.bitMask = bitutil.BitMask[(int64(bitOffset)+space)%8]
- b.byteOffset += (int64(bitOffset) + space) / 8
- b.curByte = appslice[len(appslice)-1]
-
- return int(space)
+ *bitutil.BitmapWriter
}
-func (b *bitmapWriter) Finish() {
- if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
- b.buf[int(b.byteOffset)] = b.curByte
- }
+func NewBitmapWriter(bitmap []byte, start, length int) BitmapWriter {
+ return &bitmapWriter{bitutil.NewBitmapWriter(bitmap, start, length)}
}
func (b *bitmapWriter) AppendWord(uint64, int64) {
- panic("AppendWord not implemented")
+ panic("unimplemented")
}
type firstTimeBitmapWriter struct {
@@ -213,24 +79,24 @@ func NewFirstTimeBitmapWriter(buf []byte, start, length
int64) BitmapWriter {
length: length,
}
if length > 0 {
- ret.curByte = ret.buf[int(ret.byteOffset)] &
PrecedingBitmask[start%8]
+ ret.curByte = ret.buf[int(ret.byteOffset)] &
bitutil.PrecedingBitmask[start%8]
}
return ret
}
var endianBuffer [8]byte
-func (bw *firstTimeBitmapWriter) Reset(start, length int64) {
+func (bw *firstTimeBitmapWriter) Reset(start, length int) {
bw.pos = 0
- bw.byteOffset = start / 8
+ bw.byteOffset = int64(start / 8)
bw.bitMask = bitutil.BitMask[start%8]
- bw.length = length
+ bw.length = int64(length)
if length > 0 {
- bw.curByte = bw.buf[int(bw.byteOffset)] &
PrecedingBitmask[start%8]
+ bw.curByte = bw.buf[int(bw.byteOffset)] &
bitutil.PrecedingBitmask[start%8]
}
}
-func (bw *firstTimeBitmapWriter) Pos() int64 { return bw.pos }
+func (bw *firstTimeBitmapWriter) Pos() int { return int(bw.pos) }
func (bw *firstTimeBitmapWriter) AppendWord(word uint64, nbits int64) {
if nbits == 0 {
return
@@ -251,7 +117,7 @@ func (bw *firstTimeBitmapWriter) AppendWord(word uint64,
nbits int64) {
carry := 8 - bitOffset
// Carry over bits from word to curByte. We assume any extra
bits in word are unset
// so no additional accounting is needed for when nbits < carry
- bw.curByte |= uint8((word & uint64(PrecedingBitmask[carry])) <<
bitOffset)
+ bw.curByte |= uint8((word &
uint64(bitutil.PrecedingBitmask[carry])) << bitOffset)
// check everything was transferred to curByte
if nbits < int64(carry) {
return