[
https://issues.apache.org/jira/browse/HADOOP-19724?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18037215#comment-18037215
]
ASF GitHub Bot commented on HADOOP-19724:
-----------------------------------------
steveloughran commented on code in PR #8031:
URL: https://github.com/apache/hadoop/pull/8031#discussion_r2511392823
##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
* limitations under the License.
*/
+#include <assert.h>
+#include <stddef.h> // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+ const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
Review Comment:
explain what it does with a comment. same for the methods below
##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
* limitations under the License.
*/
+#include <assert.h>
+#include <stddef.h> // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+ const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
Review Comment:
explain what these are...assumign they're all defined in the crc spec, say so
##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
* limitations under the License.
*/
+#include <assert.h>
+#include <stddef.h> // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+ const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmul %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmulh %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ uint32_t c = crc;
+ for (size_t i = 0; i < len; ++i) {
+ c ^= buf[i];
+ for (int k = 0; k < 8; ++k) {
+ uint32_t mask = -(int32_t)(c & 1);
+ c = (c >> 1) ^ (0xEDB88320U & mask); // reflected polynomial
+ }
+ }
+ return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ const uint8_t *p = buf;
+ size_t n = len;
+
+ if (n < 32) {
+ return rv_crc32_zlib_bitwise(crc, p, n);
+ }
+
+ uintptr_t mis = (uintptr_t)p & 0xF;
+ if (unlikely(mis)) {
+ size_t pre = 16 - mis;
+ if (pre > n) pre = n;
+ crc = rv_crc32_zlib_bitwise(crc, p, pre);
+ p += pre;
+ n -= pre;
+ }
+
+ uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+ uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+ x0 ^= (uint64_t)crc;
+ p += 16;
+ n -= 16;
+
+ const uint64_t C1 = RV_CRC32_CONST_R3;
+ const uint64_t C2 = RV_CRC32_CONST_R4;
+
+ while (likely(n >= 16)) {
+ uint64_t tL = rv_clmul(C2, x1);
+ uint64_t tH = rv_clmulh(C2, x1);
+ uint64_t yL = rv_clmul(C1, x0);
+ uint64_t yH = rv_clmulh(C1, x0);
+ x0 = yL ^ tL;
+ x1 = yH ^ tH;
+
+ uint64_t d0 = *(const uint64_t *)(const void *)(p + 0);
+ uint64_t d1 = *(const uint64_t *)(const void *)(p + 8);
+ x0 ^= d0;
+ x1 ^= d1;
+ p += 16;
+ n -= 16;
+ }
+
+ {
+ uint64_t tH = rv_clmulh(x0, C2);
+ uint64_t tL = rv_clmul(x0, C2);
+ x0 = x1 ^ tL;
+ x1 = tH;
+ }
+
+ uint64_t hi = x1;
+ uint64_t lo = x0;
+ uint64_t t2 = (lo >> 32) | (hi << 32);
+ lo &= RV_CRC32_MASK32;
+
+ lo = rv_clmul(RV_CRC32_CONST_R5, lo) ^ t2;
+ uint64_t tmp = lo;
+ lo &= RV_CRC32_MASK32;
+ lo = rv_clmul(lo, RV_CRC32_CONST_RU);
+ lo &= RV_CRC32_MASK32;
+ lo = rv_clmul(lo, RV_CRC32_POLY_TRUE_LE_FULL) ^ tmp;
+
+ uint32_t c = (uint32_t)(lo >> 32);
+
+ if (n) {
+ c = rv_crc32_zlib_bitwise(c, p, n);
+ }
+ return c;
+}
+
/**
- * RISC-V CRC32 hardware acceleration (placeholder)
+ * Pipelined version of hardware-accelerated CRC32 calculation using
+ * RISC-V Zbc carry-less multiply instructions.
*
- * Phase 1: provide a RISC-V-specific compilation unit that currently makes
- * no runtime changes and falls back to the generic software path in
- * bulk_crc32.c. Future work will add Zbc-based acceleration and runtime
- * dispatch.
+ * crc1, crc2, crc3 : Store initial checksum for each block before
+ * calling. When it returns, updated checksums are stored.
+ * p_buf : The base address of the data buffer. The buffer should be
+ * at least as big as block_size * num_blocks.
+ * block_size : The size of each block in bytes.
+ * num_blocks : The number of blocks to work on. Min = 1, Max = 3
Review Comment:
0 is taken too, just treated as a no-op. mention and that any other value
raises an assertion. which isn't going to be picked up, is it?
##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
* limitations under the License.
*/
+#include <assert.h>
+#include <stddef.h> // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+ const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmul %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmulh %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ uint32_t c = crc;
+ for (size_t i = 0; i < len; ++i) {
+ c ^= buf[i];
+ for (int k = 0; k < 8; ++k) {
+ uint32_t mask = -(int32_t)(c & 1);
+ c = (c >> 1) ^ (0xEDB88320U & mask); // reflected polynomial
+ }
+ }
+ return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ const uint8_t *p = buf;
+ size_t n = len;
+
+ if (n < 32) {
+ return rv_crc32_zlib_bitwise(crc, p, n);
+ }
+
+ uintptr_t mis = (uintptr_t)p & 0xF;
Review Comment:
comment that this is to handle misaligned data and that this is considered
unlikely
##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
* limitations under the License.
*/
+#include <assert.h>
+#include <stddef.h> // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+ const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmul %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+ uint64_t r;
+ __asm__ volatile(
+ ".option push\n\t"
+ ".option arch, +zbc\n\t"
+ "clmulh %0, %1, %2\n\t"
+ ".option pop\n\t"
+ : "=r"(r)
+ : "r"(a), "r"(b));
+ return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ uint32_t c = crc;
+ for (size_t i = 0; i < len; ++i) {
+ c ^= buf[i];
+ for (int k = 0; k < 8; ++k) {
+ uint32_t mask = -(int32_t)(c & 1);
+ c = (c >> 1) ^ (0xEDB88320U & mask); // reflected polynomial
+ }
+ }
+ return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+ size_t len) {
+ const uint8_t *p = buf;
+ size_t n = len;
+
+ if (n < 32) {
+ return rv_crc32_zlib_bitwise(crc, p, n);
+ }
+
+ uintptr_t mis = (uintptr_t)p & 0xF;
+ if (unlikely(mis)) {
+ size_t pre = 16 - mis;
+ if (pre > n) pre = n;
+ crc = rv_crc32_zlib_bitwise(crc, p, pre);
+ p += pre;
+ n -= pre;
+ }
+
+ uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+ uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+ x0 ^= (uint64_t)crc;
+ p += 16;
+ n -= 16;
+
+ const uint64_t C1 = RV_CRC32_CONST_R3;
+ const uint64_t C2 = RV_CRC32_CONST_R4;
+
+ while (likely(n >= 16)) {
Review Comment:
and explain this is going through 16 bytes of aligned data
> [RISC-V] Add rv bulk CRC32 (non-CRC32C) optimized path
> -------------------------------------------------------
>
> Key: HADOOP-19724
> URL: https://issues.apache.org/jira/browse/HADOOP-19724
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: hadoop-common
> Affects Versions: 3.5.0
> Reporter: Ptroc
> Priority: Major
> Labels: native, pull-request-available, risc-v
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]