[Bug tree-optimization/100171] autovectorizer

2021-08-16 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100171

--- Comment #4 from Andrew Pinski  ---
testcase:
#include 
#include 

template  class foo
{
public:
using array_type = std::array;

array_type
value;

inline constexpr foo&operator+=(const foo& arg)noexcept
{
for (size_t i=0; i, 1>;
void exe_self_1d(ARR_1D& out, const ARR_1D& arg) noexcept {
exe_self(out, arg); }
void exe_1d(ARR_1D& out, const ARR_1D& arg1, const ARR_1D& arg2) noexcept {
exe(out, arg1, arg2); }

using ARR_2D = std::array, 2>;
void exe_self_2d(ARR_2D& out, const ARR_2D& arg) noexcept {
exe_self(out, arg); }
void exe_2d(ARR_2D& out, const ARR_2D& arg1, const ARR_2D& arg2) noexcept {
exe(out, arg1, arg2); }

using ARR_4D = std::array, 4>;
void exe_self_4d(ARR_4D& out, const ARR_4D& arg) noexcept {
exe_self(out, arg); }
void exe_4d(ARR_4D& out, const ARR_4D& arg1, const ARR_4D& arg2) noexcept {
exe(out, arg1, arg2); }


//  float32
using ARR_1F = std::array, 1>;
void exe_self_1f(ARR_1F& out, const ARR_1F& arg)  noexcept {
exe_self(out, arg);  }
void exe_1f(ARR_1F& out, const ARR_1F& arg1, const ARR_1F& arg2)  noexcept {
exe(out, arg1, arg2); }

using ARR_2F = std::array, 2>;
void exe_self_2f(ARR_2F& out, const ARR_2F& arg) noexcept {
exe_self(out, arg); }
void exe_2f(ARR_2F& out, const ARR_2F& arg1, const ARR_2F& arg2) noexcept {
exe(out, arg1, arg2); }

using ARR_4F = std::array, 4>;
void exe_self_4f(ARR_4F& out, const ARR_4F& arg) noexcept {
exe_self(out, arg); }
void exe_4f(ARR_4F& out, const ARR_4F& arg1, const ARR_4F& arg2) noexcept {
exe(out, arg1, arg2); }


//  int64
using ARR_1i64 = std::array, 1>;
void exe_self_1i64(ARR_1i64& out, const ARR_1i64& arg)  
noexcept { exe_self(out, arg);  }
void exe_1i64(ARR_1i64& out, const ARR_1i64& arg1, const ARR_1i64& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_2i64 = std::array, 2>;
void exe_self_2i64(ARR_2i64& out, const ARR_2i64& arg)  
noexcept { exe_self(out, arg); }
void exe_2i64(ARR_2i64& out, const ARR_2i64& arg1, const ARR_2i64& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_4i64 = std::array, 4>;
void exe_self_4i64(ARR_4i64& out, const ARR_4i64& arg)  
noexcept { exe_self(out, arg); }
void exe_4i64(ARR_4i64& out, const ARR_4i64& arg1, const ARR_4i64& arg2)
noexcept { exe(out, arg1, arg2); }


//  int32
using ARR_1i32 = std::array, 1>;
void exe_self_1i32(ARR_1i32& out, const ARR_1i32& arg)  
noexcept { exe_self(out, arg);  }
void exe_1i32(ARR_1i32& out, const ARR_1i32& arg1, const ARR_1i32& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_2i32 = std::array, 2>;
void exe_self_2i32(ARR_2i32& out, const ARR_2i32& arg)  
noexcept { exe_self(out, arg); }
void exe_2i32(ARR_2i32& out, const ARR_2i32& arg1, const ARR_2i32& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_4i32 = std::array, 4>;
void exe_self_4i32(ARR_4i32& out, const ARR_4i32& arg)  
noexcept { exe_self(out, arg); }
void exe_4i32(ARR_4i32& out, const ARR_4i32& arg1, const ARR_4i32& arg2)
noexcept { exe(out, arg1, arg2); }


//  int16
using ARR_1i16 = std::array, 1>;
void exe_self_1i16(ARR_1i16& out, const ARR_1i16& arg)  
noexcept { exe_self(out, arg);  }
void exe_1i16(ARR_1i16& out, const ARR_1i16& arg1, const ARR_1i16& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_2i16 = std::array, 2>;
void exe_self_2i16(ARR_2i16& out, const ARR_2i16& arg)  
noexcept { exe_self(out, arg); }
void exe_2i16(ARR_2i16& out, const ARR_2i16& arg1, const ARR_2i16& arg2)
noexcept { exe(out, arg1, arg2); }

using ARR_4i16 = std::array, 4>;
void exe_self_4i16(ARR_4i16& out, const ARR_4i16& arg)  
noexcept { exe_self(out, arg); }
void exe_4i16(ARR_4i16& out, const ARR_4i16& arg1, const ARR_4i16& arg2)
noexcept { exe(out, arg1, arg2); }


//  int8
using ARR_1i8 = std::array, 1>;
void exe_self_1i8(ARR_1i8& out, const ARR_1i8& arg)  noexcept {
exe_self(out, arg);  }
void exe_1i8(ARR_1i8& out, const ARR_1i8& arg1, const ARR_1i8& arg2) noexcept {
exe(out, arg1, arg2); }

using ARR_2i8 = std::array, 2>;
void exe_self_2i8(ARR_2i8& out, const ARR_2i8& arg)  noexcept {
exe_self(out, arg); }
void exe_2i8(ARR_2i8& out, const ARR_2i8& arg1, const ARR_2i8& arg2) noexcept {
exe(out, arg1, arg2); }

using ARR_4i8 = std::array, 4>;
void exe_self_4i8(ARR_4i8& out, const ARR_4i8& arg)  noexcept {
exe_self(out, arg); }
void exe_4i8(ARR_4i8& out, const ARR_4i8& arg1, const ARR_4i8& arg2) noexcept {
exe(out, arg1, arg2); }

[Bug tree-optimization/100171] autovectorizer

2021-04-21 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100171

--- Comment #3 from Richard Biener  ---
Compared to the non-self case where we see

   [local count: 357878152]:
  _19 = MEM  [(const value_type &)arg1_3(D)][0];
  _20 = MEM  [(const value_type &)arg2_4(D)][0];
  _21 = _19 + _20;
  _26 = MEM  [(const value_type &)arg1_3(D)][1];
  _27 = MEM  [(const value_type &)arg2_4(D)][1];
  _28 = _26 + _27;
  res ={v} {CLOBBER};
  MEM[(struct value_type *)out_2(D)][0].value._M_elems[0] = _21;
  MEM[(struct value_type *)out_2(D)][0].value._M_elems[1] = _28;
  return;

here intermediate optimizations have elided 'res'.

[Bug tree-optimization/100171] autovectorizer

2021-04-20 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100171

Richard Biener  changed:

   What|Removed |Added

 CC||hubicka at gcc dot gnu.org,
   ||rguenth at gcc dot gnu.org

--- Comment #2 from Richard Biener  ---
Well, the issue is that we end up with (for the simplest case):

   [local count: 357878152]:
  _15 = MEM  [(const value_type &)arg_3(D)][0];
  _16 = MEM  [(value_type &)out_2(D)][0];
  _17 = _15 + _16;
  MEM  [(value_type &)out_2(D)][0] = _17;
  _22 = MEM  [(const value_type &)arg_3(D)][1];
  _23 = MEM  [(value_type &)out_2(D)][1];
  _24 = _22 + _23;
  MEM  [(value_type &)out_2(D)][1] = _24;
  return;

and the first store into out[0] can end up writing to arg[1].  I don't see
what we can easily do here.  Path based disambiguation could maybe argue
that partial overlaps of value_type are not allowed.

[Bug tree-optimization/100171] autovectorizer

2021-04-20 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100171

Andrew Pinski  changed:

   What|Removed |Added

Version|unknown |11.0
   Severity|normal  |enhancement
   Keywords||alias
  Component|c++ |tree-optimization

--- Comment #1 from Andrew Pinski  ---
There is an aliasing issue with the += case.
I Noticed that even clang does not auto-vectorizes the exe_self_* cases either.