zeroshade commented on code in PR #13768: URL: https://github.com/apache/arrow/pull/13768#discussion_r937040390
########## go/arrow/bitutil/bitmaps.go: ########## @@ -422,3 +423,81 @@ func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { dst[nbytes-1] &= ^trailMask dst[nbytes-1] |= lastData & trailMask } + +type bitOp struct { + opWord func(uint64, uint64) uint64 + opByte func(byte, byte) byte +} + +var ( + bitAndOp = bitOp{ + opWord: func(l, r uint64) uint64 { return l & r }, + opByte: func(l, r byte) byte { return l & r }, + } + bitOrOp = bitOp{ + opWord: func(l, r uint64) uint64 { return l | r }, + opByte: func(l, r byte) byte { return l | r }, + } +) + +func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with unaligned offsets") + debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with unaligned output offset") + + nbytes := BytesForBits(length + lOffset%8) + left = left[lOffset/8:] + right = right[rOffset/8:] + out = out[outOffset/8:] + for i := int64(0); i < nbytes; i++ { + out[i] = op.opByte(left[i], right[i]) + } +} + +func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + leftRdr := NewBitmapWordReader(left, int(lOffset), int(length)) + rightRdr := NewBitmapWordReader(right, int(rOffset), int(length)) + writer := NewBitmapWordWriter(out, int(outOffset), int(length)) + + for nwords := leftRdr.Words(); nwords > 0; nwords-- { + writer.PutNextWord(op.opWord(leftRdr.NextWord(), rightRdr.NextWord())) Review Comment: Well this is much better... ``` goos: linux goarch: amd64 pkg: github.com/apache/arrow/go/v10/arrow/bitutil cpu: 12th Gen Intel(R) Core(TM) i7-12700H BenchmarkBitmapAnd/nbytes=32768/0-20 1110499 1078 ns/op 60797.80 MB/s 0 B/op 0 allocs/op BenchmarkBitmapAnd/nbytes=32768/1-20 29881 39699 ns/op 1650.82 MB/s 192 B/op 3 allocs/op BenchmarkBitmapAnd/nbytes=32768/2-20 30028 39396 ns/op 1663.50 MB/s 192 B/op 3 allocs/op BenchmarkBitmapAnd/nbytes=131072/0-20 254810 4698 ns/op 55795.91 MB/s 0 B/op 0 allocs/op BenchmarkBitmapAnd/nbytes=131072/1-20 7418 159083 ns/op 1647.85 MB/s 192 B/op 3 allocs/op BenchmarkBitmapAnd/nbytes=131072/2-20 7378 157388 ns/op 1665.59 MB/s 192 B/op 3 allocs/op ``` :smile: Though it looks like i should make a card to look into similarly speeding up the unaligned cases (by way of making the bitmapword reader and writer faster) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org