zeroshade commented on code in PR #13768: URL: https://github.com/apache/arrow/pull/13768#discussion_r936765696
########## go/arrow/bitutil/bitmaps_test.go: ########## @@ -356,3 +358,119 @@ func BenchmarkBitmapReader(b *testing.B) { } }) } + +type bitmapOp struct { + noAlloc func(left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) + alloc func(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer +} + +type BitmapOpSuite struct { + suite.Suite +} + +func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { + var ( + left, right []byte + out *memory.Buffer + length int64 + ) + for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} { + s.Run(fmt.Sprintf("left offset %d", lOffset), func() { + left = bitmapFromSlice(leftBits, int(lOffset)) + length = int64(len(leftBits)) + for _, rOffset := range []int64{lOffset, lOffset + 8, lOffset + 40} { + s.Run(fmt.Sprintf("right offset %d", rOffset), func() { + right = bitmapFromSlice(rightBits, int(rOffset)) + for _, outOffset := range []int64{lOffset, lOffset + 16, lOffset + 24} { + s.Run(fmt.Sprintf("out offset %d", outOffset), func() { + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + + memory.Set(out.Bytes(), 0x00) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) + rdr = bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + } + }) + } + }) + } +} + +func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { + var ( + left, right []byte + out *memory.Buffer + length int64 + offsets = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} + ) + + for _, lOffset := range offsets { + s.Run(fmt.Sprintf("left offset %d", lOffset), func() { + left = bitmapFromSlice(leftBits, int(lOffset)) + length = int64(len(leftBits)) + for _, rOffset := range offsets { + s.Run(fmt.Sprintf("right offset %d", rOffset), func() { + right = bitmapFromSlice(rightBits, int(rOffset)) + for _, outOffset := range offsets { + s.Run(fmt.Sprintf("out offset %d", outOffset), func() { + s.Run("alloc", func() { + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + s.Run("noalloc", func() { + memory.Set(out.Bytes(), 0x00) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + }) + } + }) + } + }) + } +} + +func (s *BitmapOpSuite) TestBitmapAnd() { + op := bitmapOp{ + noAlloc: bitutil.BitmapAnd, + alloc: bitutil.BitmapAndAlloc, + } + + leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} + rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} + resultBits := []bool{false, false, true, false, false, false, false, false, false, true, false, false, false, false} Review Comment: So, I've been looking at the C++ implementation for a bit which I sourced this from and it looks like there's a bug I've inherited (correct me if i'm wrong please). Basically, if the bitmaps passed in to `BitmapAnd` and `BitmapOr` (in util/bitmap_ops.h/cc) if the offsets are non-zero (but identical, so it goes to the aligned case) but the passed in length is 0, it's still going to perform the bit-wise operation on the first byte of the bitmaps because it counts the number of bytes to operate on as `BytesForBits(length + offset % 8)` which would be 1 for any non-byte-aligned offset even with length being 0. This also exposes an issue (intentional or not, either way it's not documented) that the in the Aligned case, the `BitmapAnd` and `BitmapOr` functions will clobber preceding / trailing bits in the first/last bytes for non-byte-aligned offsets. In the un-aligned case, by using the BitmapWordWriter, those preceding and trailing bits get maintained because the writer utilizes masks to protect them. I'm gonna address these issues in this implementation, but I figured I'd mention them in case there's something in the C++ implementation that I'm missing or it exhibits these problems. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org