https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114908

--- Comment #5 from Matthias Kretz (Vir) <mkretz at gcc dot gnu.org> ---
https://godbolt.org/z/P6cfbjT9f

#include <stdint.h>

typedef uint64_t T;

typedef T V [[gnu::vector_size(32)]];

typedef struct simd4 {
    V data;
} simd4;

typedef struct simd1 {
    T data;
} simd1;

typedef struct tup3_1 {
    simd4 a;
    simd1 b;
} tup3_1;

simd1 load1(const T* ptr) {
    simd1 ret = {ptr[0]};
    return ret;
}

simd4 load3(const T* ptr) {
    simd4 ret = {};
    __builtin_memcpy(&ret, ptr, 3 * sizeof(T));
    return ret;
}

tup3_1 split3_1(simd4 x) {
    const T* ptr = (T*)&x;
    tup3_1 ret = {load3(ptr), load1(ptr + 3)};
    return ret;
}

simd4 concat1_3(simd1 a, simd4 b) {
    simd4 ret = {};
    char* ptr = (char*)&ret;
    __builtin_memcpy(ptr, &a, sizeof(T));
    __builtin_memcpy(ptr + sizeof(T), &b, 3 * sizeof(T));
    return ret;
}

simd4 perm(simd4 data) {
    tup3_1 carry = split3_1(data);
    simd1 zero = {};
    return concat1_3(zero, carry.a);
}

Reply via email to