> > +/* On 32 bit platform, need to use atomic to avoid load/store tearing */
> > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t;  
> As shown by Godbolt experiments discussed in a previous thread [2], 
> non-tearing 64 bit counters can be implemented without using atomic 
> instructions on all 32 bit architectures supported by DPDK. So we should use 
> the counter/offset design pattern for RTE_ARCH_32 too.
> [2]: 
> https://inbox.dpdk.org/dev/98cbd80474fa8b44bf855df32c47dc35e9f...@smartserver.smartshare.dk/

This code built with -O3 and -m32 on godbolt shows split problem.

#include <stdint.h>

typedef uint64_t rte_counter64_t;

rte_counter64_add(rte_counter64_t *counter, uint32_t val)
        *counter += val;
…       *counter = val;

        push    ebx
        mov     eax, DWORD PTR [esp+8]
        xor     ebx, ebx
        mov     ecx, DWORD PTR [esp+12]
        add     DWORD PTR [eax], ecx
        adc     DWORD PTR [eax+4], ebx
        pop     ebx

        mov     eax, DWORD PTR [esp+4]
        mov     edx, DWORD PTR [eax+4]
        mov     eax, DWORD PTR [eax]
        movq    xmm0, QWORD PTR [esp+8]
        mov     eax, DWORD PTR [esp+4]
        movq    QWORD PTR [eax], xmm0

