zeroshade commented on code in PR #13768:
URL: https://github.com/apache/arrow/pull/13768#discussion_r936765696


##########
go/arrow/bitutil/bitmaps_test.go:
##########
@@ -356,3 +358,119 @@ func BenchmarkBitmapReader(b *testing.B) {
                }
        })
 }
+
+type bitmapOp struct {
+       noAlloc func(left, right []byte, lOffset, rOffset int64, out []byte, 
outOffset, length int64)
+       alloc   func(mem memory.Allocator, left, right []byte, lOffset, rOffset 
int64, length, outOffset int64) *memory.Buffer
+}
+
+type BitmapOpSuite struct {
+       suite.Suite
+}
+
+func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, 
resultBits []bool) {
+       var (
+               left, right []byte
+               out         *memory.Buffer
+               length      int64
+       )
+       for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 
65536} {
+               s.Run(fmt.Sprintf("left offset %d", lOffset), func() {
+                       left = bitmapFromSlice(leftBits, int(lOffset))
+                       length = int64(len(leftBits))
+                       for _, rOffset := range []int64{lOffset, lOffset + 8, 
lOffset + 40} {
+                               s.Run(fmt.Sprintf("right offset %d", rOffset), 
func() {
+                                       right = bitmapFromSlice(rightBits, 
int(rOffset))
+                                       for _, outOffset := range 
[]int64{lOffset, lOffset + 16, lOffset + 24} {
+                                               s.Run(fmt.Sprintf("out offset 
%d", outOffset), func() {
+                                                       out = 
op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, 
outOffset)
+                                                       rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                       assertReaderVals(s.T(), 
rdr, resultBits)
+
+                                                       memory.Set(out.Bytes(), 
0x00)
+                                                       op.noAlloc(left, right, 
lOffset, rOffset, out.Bytes(), outOffset, length)
+                                                       rdr = 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                       assertReaderVals(s.T(), 
rdr, resultBits)
+                                               })
+                                       }
+                               })
+                       }
+               })
+       }
+}
+
+func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, 
resultBits []bool) {
+       var (
+               left, right []byte
+               out         *memory.Buffer
+               length      int64
+               offsets     = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 
65536}
+       )
+
+       for _, lOffset := range offsets {
+               s.Run(fmt.Sprintf("left offset %d", lOffset), func() {
+                       left = bitmapFromSlice(leftBits, int(lOffset))
+                       length = int64(len(leftBits))
+                       for _, rOffset := range offsets {
+                               s.Run(fmt.Sprintf("right offset %d", rOffset), 
func() {
+                                       right = bitmapFromSlice(rightBits, 
int(rOffset))
+                                       for _, outOffset := range offsets {
+                                               s.Run(fmt.Sprintf("out offset 
%d", outOffset), func() {
+                                                       s.Run("alloc", func() {
+                                                               out = 
op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, 
outOffset)
+                                                               rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                               
assertReaderVals(s.T(), rdr, resultBits)
+                                                       })
+                                                       s.Run("noalloc", func() 
{
+                                                               
memory.Set(out.Bytes(), 0x00)
+                                                               
op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length)
+                                                               rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                               
assertReaderVals(s.T(), rdr, resultBits)
+                                                       })
+                                               })
+                                       }
+                               })
+                       }
+               })
+       }
+}
+
+func (s *BitmapOpSuite) TestBitmapAnd() {
+       op := bitmapOp{
+               noAlloc: bitutil.BitmapAnd,
+               alloc:   bitutil.BitmapAndAlloc,
+       }
+
+       leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}
+       rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}
+       resultBits := []bool{false, false, true, false, false, false, false, 
false, false, true, false, false, false, false}

Review Comment:
   So, I've been looking at the C++ implementation for a bit which I sourced 
this from and it looks like there's a bug I've inherited (correct me if i'm 
wrong please).
   
   Basically, if the bitmaps passed in to `BitmapAnd` and `BitmapOr` (in 
util/bitmap_ops.h/cc) if the offsets are non-zero (but identical, so it goes to 
the aligned case) but the passed in length is 0, it's still going to perform 
the bit-wise operation on the first byte of the bitmaps because it counts the 
number of bytes to operate on as `BytesForBits(length + offset % 8)` which 
would be 1 for any non-byte-aligned offset even with length being 0. This also 
exposes an issue (intentional or not, either way it's not documented) that the 
in the Aligned case, the `BitmapAnd` and `BitmapOr` functions will clobber 
preceding / trailing bits in the first/last bytes for non-byte-aligned offsets. 
   
   In the un-aligned case, by using the BitmapWordWriter, those preceding and 
trailing bits get maintained because the writer utilizes masks to protect them. 
I'm gonna address these issues in this implementation, but I figured I'd 
mention them in case there's something in the C++ implementation that I'm 
missing or it exhibits these problems. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to