On Mon, Jan 12, 2026 at 10:40:59AM -0500, [email protected] wrote: > From: Scott Mitchell <[email protected]> > > Modify RTE_PTR_ADD and RTE_PTR_SUB to use char* pointer arithmetic > instead of uintptr_t casts. Benefits of this approach: > - API compatibility: works for both integer and pointer inputs > - Retains simple macros: no pragmas, no _Generic > - Enables compiler optimizations: Clang now can unroll and vectorize > pointer loops, and GCC still can apply these optimizations. > > Example use case which benefits is __rte_raw_cksum. Performance > results from cksum_perf_autotest on Intel Xeon (Cascade Lake, > AVX-512) built with Clang 18.1 (TSC cycles/byte): > Block size Before After Improvement > 100 0.40 0.24 ~40% > 1500 0.50 0.06 ~8x > 9000 0.49 0.06 ~8x > > Signed-off-by: Scott Mitchell <[email protected]> > ---
The macro implementation LGTM Acked-by: Bruce Richardson <[email protected]> Couple of comments on the unit tests inline below. Most are only suggestions, so feel free to ignore if you think they aren't worth considering. > app/test/meson.build | 1 + > app/test/test_ptr_add_sub.c | 190 +++++++++++++++++++++++++++++++++++ > lib/eal/include/rte_common.h | 4 +- > 3 files changed, 193 insertions(+), 2 deletions(-) > create mode 100644 app/test/test_ptr_add_sub.c > > diff --git a/app/test/meson.build b/app/test/meson.build > index efec42a6bf..80a19d65ba 100644 > --- a/app/test/meson.build > +++ b/app/test/meson.build > @@ -152,6 +152,7 @@ source_file_deps = { > 'test_power_intel_uncore.c': ['power', 'power_intel_uncore'], > 'test_power_kvm_vm.c': ['power', 'power_kvm_vm'], > 'test_prefetch.c': [], > + 'test_ptr_add_sub.c': [], > 'test_ptr_compress.c': ['ptr_compress'], > 'test_rand_perf.c': [], > 'test_rawdev.c': ['rawdev', 'bus_vdev', 'raw_skeleton'], > diff --git a/app/test/test_ptr_add_sub.c b/app/test/test_ptr_add_sub.c > new file mode 100644 > index 0000000000..ef449ca9af > --- /dev/null > +++ b/app/test/test_ptr_add_sub.c > @@ -0,0 +1,190 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2026 Apple Inc. > + */ > + > +#include "test.h" > +#include <stdint.h> > +#include <stdbool.h> > + > +#include <rte_common.h> > + > +/* Test all C11 standard integer types */ > +static int > +test_ptr_add_sub_integer_types(void) > +{ > + unsigned long long ull = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ull, 100), 0x1064, > + "RTE_PTR_ADD failed for unsigned long long"); Given all the typecasting in the macros, I don't like the casting to uintptr_t when checking the return values. Maybe use (void *) on the value being compared-with instead. A suggestion: can you maybe put at the top: #define test_initval 0x1000 #define test_increment 100 #define test_retval ((void *)0x1064)) and use those everywhere rather than the hard-coded constants? > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_SUB(ull, 100), 0x1000 - 100, > + "RTE_PTR_SUB failed for unsigned long long"); > + > + long long ll = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ll, 100), 0x1064, > + "RTE_PTR_ADD failed for long long"); > + > + unsigned long ul = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ul, 100), 0x1064, > + "RTE_PTR_ADD failed for unsigned long"); > + > + long l = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(l, 100), 0x1064, > + "RTE_PTR_ADD failed for long"); > + > + unsigned int ui = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ui, 100), 0x1064, > + "RTE_PTR_ADD failed for unsigned int"); > + > + int i = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(i, 100), 0x1064, > + "RTE_PTR_ADD failed for int"); > + > + unsigned short us = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(us, 100), 0x1064, > + "RTE_PTR_ADD failed for unsigned short"); > + > + short s = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(s, 100), 0x1064, > + "RTE_PTR_ADD failed for short"); > + > + unsigned char uc = 100; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(uc, 50), 150, > + "RTE_PTR_ADD failed for unsigned char"); > + > + signed char sc = 100; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(sc, 50), 150, > + "RTE_PTR_ADD failed for signed char"); > + > + char c = 100; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(c, 50), 150, > + "RTE_PTR_ADD failed for char"); > + Maybe define a new set of macros for small sized ints? Or else just use 100 and 50 generally, rather than 0x1064? The test should be just as good, right? > + _Bool b = 1; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(b, 99), 100, > + "RTE_PTR_ADD failed for _Bool"); > + > + bool b2 = true; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(b2, 99), 100, > + "RTE_PTR_ADD failed for bool"); > + > + return 0; > +} > + > +/* Test pointer types including const correctness */ > +static int > +test_ptr_add_sub_pointer_types(void) > +{ > + char buffer[256]; > + void *result; > + > + /* Test void* and const void* */ > + void *vp = buffer; > + result = RTE_PTR_ADD(vp, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for void*"); > + result = RTE_PTR_SUB(vp, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for void*"); > + > + const void *cvp = buffer; > + result = RTE_PTR_ADD(cvp, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for const void*"); > + result = RTE_PTR_SUB(cvp, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for const void*"); > + > + /* Test char* and const char* */ > + char *cp = buffer; > + result = RTE_PTR_ADD(cp, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for char*"); > + result = RTE_PTR_SUB(cp, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for char*"); > + > + const char *ccp = buffer; > + result = RTE_PTR_ADD(ccp, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for const char*"); > + result = RTE_PTR_SUB(ccp, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for const char*"); > + > + /* Test uint32_t* and const uint32_t* */ > + uint32_t *u32p = (uint32_t *)buffer; > + result = RTE_PTR_ADD(u32p, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for uint32_t*"); > + result = RTE_PTR_SUB(u32p, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for uint32_t*"); > + > + const uint32_t *cu32p = (const uint32_t *)buffer; > + result = RTE_PTR_ADD(cu32p, 100); > + TEST_ASSERT_EQUAL(result, (void *)(buffer + 100), > + "RTE_PTR_ADD failed for const uint32_t*"); > + result = RTE_PTR_SUB(cu32p, 50); > + TEST_ASSERT_EQUAL(result, (void *)(buffer - 50), > + "RTE_PTR_SUB failed for const uint32_t*"); > + > + /* Verify assigning to const pointer works (adding const is safe) */ > + const void *result_const = RTE_PTR_ADD(cvp, 100); > + TEST_ASSERT_EQUAL(result_const, (const void *)(buffer + 100), > + "RTE_PTR_ADD failed when assigning to const void*"); > + > + return 0; > +} > + > +/* Test that typedefs resolve to native types correctly */ > +static int > +test_ptr_add_sub_typedefs(void) > +{ > + uint64_t u64 = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u64, 100), 0x1064, > + "RTE_PTR_ADD failed for uint64_t"); > + > + uint32_t u32 = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u32, 100), 0x1064, > + "RTE_PTR_ADD failed for uint32_t"); > + > + uint16_t u16 = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u16, 100), 0x1064, > + "RTE_PTR_ADD failed for uint16_t"); > + > + uint8_t u8 = 100; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u8, 50), 150, > + "RTE_PTR_ADD failed for uint8_t"); > + > + uintptr_t uptr = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(uptr, 100), 0x1064, > + "RTE_PTR_ADD failed for uintptr_t"); > + > + size_t sz = 0x1000; > + TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(sz, 100), 0x1064, > + "RTE_PTR_ADD failed for size_t"); > + > + return 0; > +} > + > +/* Main test function that runs all subtests */ > +static int > +test_ptr_add_sub(void) > +{ > + int ret; > + > + ret = test_ptr_add_sub_integer_types(); > + if (ret != 0) > + return ret; > + > + ret = test_ptr_add_sub_pointer_types(); > + if (ret != 0) > + return ret; > + > + ret = test_ptr_add_sub_typedefs(); > + if (ret != 0) > + return ret; > + > + return 0; > +} Very comprehensive set of tests, thanks. > + > +REGISTER_FAST_TEST(ptr_add_sub_autotest, true, true, test_ptr_add_sub); > diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h > index 9e7d84f929..54289871d2 100644 > --- a/lib/eal/include/rte_common.h > +++ b/lib/eal/include/rte_common.h > @@ -551,12 +551,12 @@ static void __attribute__((destructor(RTE_PRIO(prio)), > used)) func(void) > /** > * add a byte-value offset to a pointer > */ > -#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) > +#define RTE_PTR_ADD(ptr, x) ((void *)((char *)(uintptr_t)(ptr) + (x))) > > /** > * subtract a byte-value offset from a pointer > */ > -#define RTE_PTR_SUB(ptr, x) ((void *)((uintptr_t)(ptr) - (x))) > +#define RTE_PTR_SUB(ptr, x) ((void *)((char *)(uintptr_t)(ptr) - (x))) >

