felipecrv commented on code in PR #41690:
URL: https://github.com/apache/arrow/pull/41690#discussion_r1603723000
##########
cpp/src/arrow/util/bit_util.h:
##########
@@ -299,22 +307,22 @@ static constexpr bool GetBit(const uint8_t* bits,
uint64_t i) {
// Gets the i-th bit from a byte. Should only be used with i <= 7.
static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
- return byte & kBitmask[i];
+ return byte & GetBitMask(i);
}
static inline void ClearBit(uint8_t* bits, int64_t i) {
- bits[i / 8] &= kFlippedBitmask[i % 8];
+ bits[i / 8] &= GetFlippedBitMask(i % 8);
}
-static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |=
kBitmask[i % 8]; }
+static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |=
GetBitMask(i % 8); }
Review Comment:
One advantage that the `kBitmask` implementation had over this one is that
memory access with negative `i` is UB, so the compiler was assuming here that
`i >= 0` to come up with the bitmask.
We can bring that UB back (yes, UB is a Good Thing™️) by using
`ARROW_COMPILER_ASSUME(i >= 0)`. The generated assembly for `SetBit` with the
non-negative assumption is much shorter and doesn't need a conditional mov (or
`csel` in ARM) instruction.
```asm
SetBit2(unsigned char*, long): # @SetBit2(unsigned
char*, long)
mov rcx, rsi
lea rax, [rsi + 7]
test rsi, rsi
cmovns rax, rsi
mov edx, eax
and edx, 248
sub ecx, edx
mov edx, 1
shl edx, cl
sar rax, 3
or byte ptr [rdi + rax], dl
ret
SetBit2NNeg(unsigned char*, long): #
@SetBit2NNeg(unsigned char*, long)
mov ecx, esi
and cl, 7
mov al, 1
shl al, cl
shr rsi, 3
or byte ptr [rdi + rsi], al
ret
```
All the experiments on Godbolt -> https://godbolt.org/z/Ez974vE3d
##########
cpp/src/arrow/util/bit_util.h:
##########
@@ -281,10 +281,18 @@ static inline int Log2(uint64_t x) {
//
// Bitmask selecting the k-th bit in a byte
-static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+// static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+static constexpr uint8_t GetBitMask(uint8_t index) {
+ // DCHECK(index >= 0 && index <= 7);
+ return static_cast<uint8_t>(1) << index;
Review Comment:
The compiler will generate code that performs the `<<` on a 32-bit integer.
A more honest implementation (in the sense that it gives more freedom to the
compiler [1]):
```cpp
return static_cast<uint8_t>(0x1 << index)
// and
return static_cast<uint8_t>(~(0x1 << i))
```
And since indices in arrow are rarely `uint8_t`, I would keep the index type
unconstrained like this:
```cpp
template <typename T>
static constexpr uint8_t GetBitmask2(T index) {
return static_cast<uint8_t>(0x1 << index);
}
template <typename T>
static constexpr uint8_t GetFlippedBitmask2(T index) {
return static_cast<uint8_t>(~(0x1 << index));
}
```
[1] might matter more for `rustc` than `clang` because C/C++ compilers
already have a lot of freedom even when your code contains many type constraints
##########
cpp/src/arrow/util/bit_util.h:
##########
@@ -299,22 +307,22 @@ static constexpr bool GetBit(const uint8_t* bits,
uint64_t i) {
// Gets the i-th bit from a byte. Should only be used with i <= 7.
static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
- return byte & kBitmask[i];
+ return byte & GetBitMask(i);
}
static inline void ClearBit(uint8_t* bits, int64_t i) {
- bits[i / 8] &= kFlippedBitmask[i % 8];
+ bits[i / 8] &= GetFlippedBitMask(i % 8);
}
-static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |=
kBitmask[i % 8]; }
+static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |=
GetBitMask(i % 8); }
Review Comment:
This is OK because whatever is defined as the expected behavior for negative
`i` in the C++ standard, is bogus in the context of Arrow and `SetBit` since a
negative `i` is already poison.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]