https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99638
Jan Hubicka <hubicka at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |jamborm at gcc dot gnu.org
Summary|s132 benchmarks of TSVC on |s132 and s281 benchmarks of
|zen3 benefits from -mno-fma |TSVC on zen3 benefits from
| |-mno-fma
--- Comment #1 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
s281 benchmark:
typedef float real_t;
#define iterations 1000000
#define LEN_1D 32000
#define LEN_2D 256
// array definitions
real_t flat_2d_array[LEN_2D*LEN_2D];
real_t x[LEN_1D];
real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D],
aa[LEN_2D][LEN_2D],bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];
int indx[LEN_1D];
real_t* __restrict__ xx;
real_t* yy;
// %2.5
void main()
{
// crossing thresholds
// index set splitting
// reverse data access
real_t x;
for (int nl = 0; nl < iterations; nl++) {
for (int i = 0; i < LEN_1D; i++) {
x = a[LEN_1D-i-1] + b[i] * c[i];
a[i] = x-(real_t)1.0;
b[i] = x;
}
dummy();
}
}
with FMA runs 18s and without 14s