https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123672

            Bug ID: 123672
           Summary: __builtin_shufflevector() produces incorrect code with
                    -ftree-forwprop
           Product: gcc
           Version: 15.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: andrewpbelt at gmail dot com
  Target Milestone: ---

GCC 15.x miscompiles chained __builtin_shufflevector() operations at -O1 and
above, i.e. when -ftree-forwprop is enabled.
The code sample below is reduced from an FFT I was debugging and is the most
minimal I can manage to produce the incorrect result.

$ gcc -O1 bug.c && ./a.out # "0 0 0 0" (incorrect)
$ gcc -O1 -fno-tree-forwprop bug.c && ./a.out # "0 0 1 -1" (correct)

Tested by building GCC from source on Arch Linux x86_64 Linux 6.18.5.
GCC 15.2.1: incorrect
GCC 15.2.0: incorrect
GCC 15.1.0: incorrect
GCC 14.3.0: correct
clang 21.1.6: correct

Godbolt's copy x86_64 GCC 15.2 is correct, suggesting that this might be an
issue with the Arch Linux environment.



#include <stdio.h>

typedef int V __attribute__((vector_size(16)));

struct Result { V a, b; };

struct Result f(V i) {
    // i = {0, 1, 0, -1}

    V X = __builtin_shufflevector(i, i, 0, 0, 0, 1);
    // X = {0, 0, 0, 1}

    V e1 = __builtin_shufflevector(X, X, 0, 2, 0, 2);
    V o1 = __builtin_shufflevector(X, X, 1, 3, 1, 3);
    // e1 = {0, 0, 0, 0}
    // o1 = {0, 1, 0, 1}

    // Identical to e1 and o1
    V e2 = __builtin_shufflevector(X, X, 0, 2, 0, 2);
    V o2 = __builtin_shufflevector(X, X, 1, 3, 1, 3);
    // e2 = {0, 0, 0, 0}
    // o2 = {0, 1, 0, 1}

    // e1 + o1 = {0, 1, 0, 1}
    // e1 - o1 = {0, -1, 0, -1}
    V A = __builtin_shufflevector(e1 + o1, e1 - o1, 0, 4, 1, 5);
    // A = {0, 0, 1, -1}

    // Identical to A
    // e2 + o2 = {0, 1, 0, 1}
    // e2 - o2 = {0, -1, 0, -1}
    V B = __builtin_shufflevector(e2 + o2, e2 - o2, 0, 4, 1, 5);
    // B = {0, 0, 1, -1}
    // BUG: GCC computes B = {0, 0, 0, 0}

    return (struct Result){A, B};
}

int main() {
    struct Result r = f((V){0, 1, 0, -1});
    // Prints correct 0 0 1 -1
    printf("%d %d %d %d\n", r.a[0], r.a[1], r.a[2], r.a[3]);
    // Prints incorrect 0 0 0 0, should be 0 0 1 -1
    printf("%d %d %d %d\n", r.b[0], r.b[1], r.b[2], r.b[3]);
}

Reply via email to