zeroshade commented on code in PR #13768:
URL: https://github.com/apache/arrow/pull/13768#discussion_r937040390


##########
go/arrow/bitutil/bitmaps.go:
##########
@@ -422,3 +423,81 @@ func CopyBitmap(src []byte, srcOffset, length int, dst 
[]byte, dstOffset int) {
        dst[nbytes-1] &= ^trailMask
        dst[nbytes-1] |= lastData & trailMask
 }
+
+type bitOp struct {
+       opWord func(uint64, uint64) uint64
+       opByte func(byte, byte) byte
+}
+
+var (
+       bitAndOp = bitOp{
+               opWord: func(l, r uint64) uint64 { return l & r },
+               opByte: func(l, r byte) byte { return l & r },
+       }
+       bitOrOp = bitOp{
+               opWord: func(l, r uint64) uint64 { return l | r },
+               opByte: func(l, r byte) byte { return l | r },
+       }
+)
+
+func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out 
[]byte, outOffset int64, length int64) {
+       debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with 
unaligned offsets")
+       debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with 
unaligned output offset")
+
+       nbytes := BytesForBits(length + lOffset%8)
+       left = left[lOffset/8:]
+       right = right[rOffset/8:]
+       out = out[outOffset/8:]
+       for i := int64(0); i < nbytes; i++ {
+               out[i] = op.opByte(left[i], right[i])
+       }
+}
+
+func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, 
out []byte, outOffset int64, length int64) {
+       leftRdr := NewBitmapWordReader(left, int(lOffset), int(length))
+       rightRdr := NewBitmapWordReader(right, int(rOffset), int(length))
+       writer := NewBitmapWordWriter(out, int(outOffset), int(length))
+
+       for nwords := leftRdr.Words(); nwords > 0; nwords-- {
+               writer.PutNextWord(op.opWord(leftRdr.NextWord(), 
rightRdr.NextWord()))

Review Comment:
   Well this is much better...
   
   ```
   goos: linux
   goarch: amd64
   pkg: github.com/apache/arrow/go/v10/arrow/bitutil
   cpu: 12th Gen Intel(R) Core(TM) i7-12700H
   BenchmarkBitmapAnd/nbytes=32768/0-20          1110499              1078 
ns/op        60797.80 MB/s          0 B/op          0 allocs/op
   BenchmarkBitmapAnd/nbytes=32768/1-20            29881             39699 
ns/op        1650.82 MB/s         192 B/op          3 allocs/op
   BenchmarkBitmapAnd/nbytes=32768/2-20            30028             39396 
ns/op        1663.50 MB/s         192 B/op          3 allocs/op
   BenchmarkBitmapAnd/nbytes=131072/0-20          254810              4698 
ns/op        55795.91 MB/s          0 B/op          0 allocs/op
   BenchmarkBitmapAnd/nbytes=131072/1-20            7418            159083 
ns/op        1647.85 MB/s         192 B/op          3 allocs/op
   BenchmarkBitmapAnd/nbytes=131072/2-20            7378            157388 
ns/op        1665.59 MB/s         192 B/op          3 allocs/op
   ```
   
   :smile: Though it looks like i should make a card to look into similarly 
speeding up the unaligned cases (by way of making the bitmapword reader and 
writer faster)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to