Felix-Gong commented on code in PR #3312:
URL: https://github.com/apache/brpc/pull/3312#discussion_r3330346641
##########
src/butil/crc32c.cc:
##########
@@ -421,7 +421,188 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t
size) {
return static_cast<uint32_t>(l ^ 0xffffffffu);
}
-// Detect if SS42 or not.
+#if defined(__riscv) && (__riscv_xlen == 64) && defined(__riscv_zbc)
+#include <stdio.h>
+
+// RISC-V Zbc carry-less multiplication inline helpers
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+ uint64_t result;
+ __asm__ volatile ("clmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
+ return result;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+ uint64_t result;
+ __asm__ volatile ("clmulh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
+ return result;
+}
+
+// Bitwise CRC32C fallback for small chunks
+static inline uint32_t rv_crc32c_bitwise(uint32_t crc, const uint8_t* buf,
+ size_t len) {
+ uint32_t c = crc;
+ for (size_t i = 0; i < len; ++i) {
+ c ^= buf[i];
+ for (int k = 0; k < 8; ++k) {
+ c = (c >> 1) ^ ((c & 1) ? 0x82F63B78U : 0);
+ }
+ }
+ return c;
+}
+
+// Fold a 128-bit CRC state (lo:hi) with fold constants and XOR in new data
+static inline void rv_fold_pair_xor_data(uint64_t* lo, uint64_t* hi,
+ uint64_t k0, uint64_t k1,
+ uint64_t d0, uint64_t d1) {
+ uint64_t l = rv_clmul(*lo, k0) ^ rv_clmul(*hi, k1);
+ uint64_t h = rv_clmulh(*lo, k0) ^ rv_clmulh(*hi, k1);
+ *lo = l ^ d0;
+ *hi = h ^ d1;
+}
+
+// Fold a 128-bit CRC state with fold constants and XOR in another state
+static inline void rv_fold_pair_xor_state(uint64_t* lo, uint64_t* hi,
+ uint64_t k0, uint64_t k1,
+ uint64_t s0, uint64_t s1) {
+ uint64_t l = rv_clmul(*lo, k0) ^ rv_clmul(*hi, k1);
+ uint64_t h = rv_clmulh(*lo, k0) ^ rv_clmulh(*hi, k1);
+ *lo = l ^ s0;
+ *hi = h ^ s1;
+}
+
+// Folding constants for CRC32C (Castagnoli polynomial 0x1EDC6F41)
+// x^(64*i+64) mod P(x) for i=1..4, in bit-reflected form
+static const uint64_t crc32c_fold_const[4] __attribute__((aligned(16))) = {
+ 0x00000000740eef02ULL, // k1: fold 512->256
+ 0x000000009e4addf8ULL, // k2: fold 512->256
+ 0x00000000f20c0dfeULL, // k3: fold 256->128
+ 0x00000000493c7d27ULL // k4: fold 256->128
+};
+
+// Barrett reduction constants for CRC32C finalization
+#define RV_CRC32C_CONST_0 0x00000000dd45aab8ULL // x^64 mod P
+#define RV_CRC32C_CONST_1 0x00000000493c7d27ULL // x^96 mod P
+#define RV_CRC32C_CONST_QUO 0x0000000dea713f1ULL // floor(x^64 / P)
+#define RV_CRC32C_CONST_POLY 0x0000000105ec76f1ULL // P(x) true LE full
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+
+// Hardware-accelerated CRC32C using RISC-V Zbc carry-less multiplication.
+// Processes data in 64-byte chunks with 128-bit folding, then Barrett reduces.
+static uint32_t rv_crc32c_clmul(uint32_t crc, const char* buf, size_t len) {
+ const uint8_t* p = reinterpret_cast<const uint8_t*>(buf);
+ size_t n = len;
+
+ // Small data: use bitwise fallback
+ if (n < 64) {
+ return rv_crc32c_bitwise(crc, p, n);
Review Comment:
Thanks for catching this. The missing ^ 0xFFFFFFFF conversion is fixed and
committed. I added the operation at function entry and all return paths.
Verified on RISC-V SG2044 (Zbc): 24 test cases pass, results align with the
table-based implementation.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]