Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-27 Thread Richard Biener
On Tue, Aug 27, 2019 at 9:01 AM Richard Sandiford
 wrote:
>
> Ilya Leoshkevich  writes:
> >> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich :
> >>
> >>> Am 26.08.2019 um 15:06 schrieb Richard Biener 
> >>> :
> >>>
> >>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich  
> >>> wrote:
> 
> > Am 26.08.2019 um 10:49 schrieb Richard Biener 
> > :
> >
> > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  
> > wrote:
> >>
> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener 
> >>> :
> >>>
> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
> >>>  wrote:
> 
>  Ilya Leoshkevich  writes:
> > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, 
> > machine_mode mode,
> > return 0;
> > }
> >
> > +/* can_vector_compare_p presents fake rtx binary operations to the 
> > the back-end
> > +   in order to determine its capabilities.  In order to avoid 
> > creating fake
> > +   operations on each call, values from previous calls are cached 
> > in a global
> > +   cached_binops hash_table.  It contains rtxes, which can be 
> > looked up using
> > +   binop_keys.  */
> > +
> > +struct binop_key {
> > +  enum rtx_code code;/* Operation code.  */
> > +  machine_mode value_mode;   /* Result mode. */
> > +  machine_mode cmp_op_mode;  /* Operand mode.*/
> > +};
> > +
> > +struct binop_hasher : pointer_hash_mark, 
> > ggc_cache_remove {
> > +  typedef rtx value_type;
> > +  typedef binop_key compare_type;
> > +
> > +  static hashval_t
> > +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> > cmp_op_mode)
> > +  {
> > +inchash::hash hstate (0);
> > +hstate.add_int (code);
> > +hstate.add_int (value_mode);
> > +hstate.add_int (cmp_op_mode);
> > +return hstate.end ();
> > +  }
> > +
> > +  static hashval_t
> > +  hash (const rtx )
> > +  {
> > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP 
> > (ref, 0)));
> > +  }
> > +
> > +  static bool
> > +  equal (const rtx , const binop_key )
> > +  {
> > +return (GET_CODE (ref1) == ref2.code)
> > +&& (GET_MODE (ref1) == ref2.value_mode)
> > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> > +  }
> > +};
> > +
> > +static GTY ((cache)) hash_table *cached_binops;
> > +
> > +static rtx
> > +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> > +   machine_mode cmp_op_mode)
> > +{
> > +  if (!cached_binops)
> > +cached_binops = hash_table::create_ggc (1024);
> > +  binop_key key = { code, value_mode, cmp_op_mode };
> > +  hashval_t hash = binop_hasher::hash (code, value_mode, 
> > cmp_op_mode);
> > +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, 
> > INSERT);
> > +  if (!*slot)
> > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
> > (cmp_op_mode),
> > + gen_reg_rtx (cmp_op_mode));
> > +  return *slot;
> > +}
> 
>  Sorry, I didn't mean anything this complicated.  I just meant that
>  we should have a single cached rtx that we can change via PUT_CODE 
>  and
>  PUT_MODE_RAW for each new query, rather than allocating a new rtx 
>  each
>  time.
> 
>  Something like:
> 
>  static GTY ((cache)) rtx cached_binop;
> 
>  rtx
>  get_cached_binop (machine_mode mode, rtx_code code, machine_mode 
>  op_mode)
>  {
>  if (cached_binop)
>  {
>    PUT_CODE (cached_binop, code);
>    PUT_MODE_RAW (cached_binop, mode);
>    PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>    PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>  }
>  else
>  {
>    rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>    rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>    cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>  }
>  return cached_binop;
>  }
> >>>
> >>> Hmm, maybe we need  auto_rtx (code) that constructs such
> >>> RTX on the stack instead of wasting a GC root (and causing
> >>> issues for future threading of GCC ;)).
> >>
> >> Do you mean something like this?
> >>
> >> union {
> >> char raw[rtx_code_size[code]];
> >> rtx rtx;
> >> } binop;
> >>
> >> Does this exist already (git grep 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-27 Thread Richard Sandiford
Ilya Leoshkevich  writes:
>> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich :
>> 
>>> Am 26.08.2019 um 15:06 schrieb Richard Biener :
>>> 
>>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich  wrote:
 
> Am 26.08.2019 um 10:49 schrieb Richard Biener 
> :
> 
> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  
> wrote:
>> 
>>> Am 23.08.2019 um 13:24 schrieb Richard Biener 
>>> :
>>> 
>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>>>  wrote:
 
 Ilya Leoshkevich  writes:
> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, 
> machine_mode mode,
> return 0;
> }
> 
> +/* can_vector_compare_p presents fake rtx binary operations to the 
> the back-end
> +   in order to determine its capabilities.  In order to avoid 
> creating fake
> +   operations on each call, values from previous calls are cached in 
> a global
> +   cached_binops hash_table.  It contains rtxes, which can be looked 
> up using
> +   binop_keys.  */
> +
> +struct binop_key {
> +  enum rtx_code code;/* Operation code.  */
> +  machine_mode value_mode;   /* Result mode. */
> +  machine_mode cmp_op_mode;  /* Operand mode.*/
> +};
> +
> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> +  typedef rtx value_type;
> +  typedef binop_key compare_type;
> +
> +  static hashval_t
> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> cmp_op_mode)
> +  {
> +inchash::hash hstate (0);
> +hstate.add_int (code);
> +hstate.add_int (value_mode);
> +hstate.add_int (cmp_op_mode);
> +return hstate.end ();
> +  }
> +
> +  static hashval_t
> +  hash (const rtx )
> +  {
> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP 
> (ref, 0)));
> +  }
> +
> +  static bool
> +  equal (const rtx , const binop_key )
> +  {
> +return (GET_CODE (ref1) == ref2.code)
> +&& (GET_MODE (ref1) == ref2.value_mode)
> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> +  }
> +};
> +
> +static GTY ((cache)) hash_table *cached_binops;
> +
> +static rtx
> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> +   machine_mode cmp_op_mode)
> +{
> +  if (!cached_binops)
> +cached_binops = hash_table::create_ggc (1024);
> +  binop_key key = { code, value_mode, cmp_op_mode };
> +  hashval_t hash = binop_hasher::hash (code, value_mode, 
> cmp_op_mode);
> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> +  if (!*slot)
> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
> (cmp_op_mode),
> + gen_reg_rtx (cmp_op_mode));
> +  return *slot;
> +}
 
 Sorry, I didn't mean anything this complicated.  I just meant that
 we should have a single cached rtx that we can change via PUT_CODE and
 PUT_MODE_RAW for each new query, rather than allocating a new rtx each
 time.
 
 Something like:
 
 static GTY ((cache)) rtx cached_binop;
 
 rtx
 get_cached_binop (machine_mode mode, rtx_code code, machine_mode 
 op_mode)
 {
 if (cached_binop)
 {
   PUT_CODE (cached_binop, code);
   PUT_MODE_RAW (cached_binop, mode);
   PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
   PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
 }
 else
 {
   rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
   rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
   cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
 }
 return cached_binop;
 }
>>> 
>>> Hmm, maybe we need  auto_rtx (code) that constructs such
>>> RTX on the stack instead of wasting a GC root (and causing
>>> issues for future threading of GCC ;)).
>> 
>> Do you mean something like this?
>> 
>> union {
>> char raw[rtx_code_size[code]];
>> rtx rtx;
>> } binop;
>> 
>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
>> anything useful), or should I implement this?
> 
> It doesn't exist AFAIK, I thought about using alloca like
> 
> rtx tem;
> rtx_alloca (tem, PLUS);
> 
> and due to using alloca rtx_alloca has to be a macro like
> 
> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
> 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-26 Thread Ilya Leoshkevich



> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich :
> 
>> Am 26.08.2019 um 15:06 schrieb Richard Biener :
>> 
>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich  wrote:
>>> 
 Am 26.08.2019 um 10:49 schrieb Richard Biener :
 
 On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  
 wrote:
> 
>> Am 23.08.2019 um 13:24 schrieb Richard Biener 
>> :
>> 
>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>>  wrote:
>>> 
>>> Ilya Leoshkevich  writes:
 @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
 mode,
 return 0;
 }
 
 +/* can_vector_compare_p presents fake rtx binary operations to the 
 the back-end
 +   in order to determine its capabilities.  In order to avoid 
 creating fake
 +   operations on each call, values from previous calls are cached in 
 a global
 +   cached_binops hash_table.  It contains rtxes, which can be looked 
 up using
 +   binop_keys.  */
 +
 +struct binop_key {
 +  enum rtx_code code;/* Operation code.  */
 +  machine_mode value_mode;   /* Result mode. */
 +  machine_mode cmp_op_mode;  /* Operand mode.*/
 +};
 +
 +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
 +  typedef rtx value_type;
 +  typedef binop_key compare_type;
 +
 +  static hashval_t
 +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
 cmp_op_mode)
 +  {
 +inchash::hash hstate (0);
 +hstate.add_int (code);
 +hstate.add_int (value_mode);
 +hstate.add_int (cmp_op_mode);
 +return hstate.end ();
 +  }
 +
 +  static hashval_t
 +  hash (const rtx )
 +  {
 +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
 0)));
 +  }
 +
 +  static bool
 +  equal (const rtx , const binop_key )
 +  {
 +return (GET_CODE (ref1) == ref2.code)
 +&& (GET_MODE (ref1) == ref2.value_mode)
 +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
 +  }
 +};
 +
 +static GTY ((cache)) hash_table *cached_binops;
 +
 +static rtx
 +get_cached_binop (enum rtx_code code, machine_mode value_mode,
 +   machine_mode cmp_op_mode)
 +{
 +  if (!cached_binops)
 +cached_binops = hash_table::create_ggc (1024);
 +  binop_key key = { code, value_mode, cmp_op_mode };
 +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
 +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
 +  if (!*slot)
 +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
 (cmp_op_mode),
 + gen_reg_rtx (cmp_op_mode));
 +  return *slot;
 +}
>>> 
>>> Sorry, I didn't mean anything this complicated.  I just meant that
>>> we should have a single cached rtx that we can change via PUT_CODE and
>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>>> time.
>>> 
>>> Something like:
>>> 
>>> static GTY ((cache)) rtx cached_binop;
>>> 
>>> rtx
>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode 
>>> op_mode)
>>> {
>>> if (cached_binop)
>>> {
>>>   PUT_CODE (cached_binop, code);
>>>   PUT_MODE_RAW (cached_binop, mode);
>>>   PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>>>   PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>>> }
>>> else
>>> {
>>>   rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>>>   rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>>>   cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>>> }
>>> return cached_binop;
>>> }
>> 
>> Hmm, maybe we need  auto_rtx (code) that constructs such
>> RTX on the stack instead of wasting a GC root (and causing
>> issues for future threading of GCC ;)).
> 
> Do you mean something like this?
> 
> union {
> char raw[rtx_code_size[code]];
> rtx rtx;
> } binop;
> 
> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
> anything useful), or should I implement this?
 
 It doesn't exist AFAIK, I thought about using alloca like
 
 rtx tem;
 rtx_alloca (tem, PLUS);
 
 and due to using alloca rtx_alloca has to be a macro like
 
 #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
 memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
 
 maybe C++ can help making this prettier but of course
 since we use alloca we have to avoid opening new scopes.
 
 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-26 Thread Ilya Leoshkevich
> Am 26.08.2019 um 15:06 schrieb Richard Biener :
> 
> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich  wrote:
>> 
>>> Am 26.08.2019 um 10:49 schrieb Richard Biener :
>>> 
>>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  wrote:
 
> Am 23.08.2019 um 13:24 schrieb Richard Biener 
> :
> 
> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>  wrote:
>> 
>> Ilya Leoshkevich  writes:
>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
>>> mode,
>>> return 0;
>>> }
>>> 
>>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
>>> back-end
>>> +   in order to determine its capabilities.  In order to avoid creating 
>>> fake
>>> +   operations on each call, values from previous calls are cached in a 
>>> global
>>> +   cached_binops hash_table.  It contains rtxes, which can be looked 
>>> up using
>>> +   binop_keys.  */
>>> +
>>> +struct binop_key {
>>> +  enum rtx_code code;/* Operation code.  */
>>> +  machine_mode value_mode;   /* Result mode. */
>>> +  machine_mode cmp_op_mode;  /* Operand mode.*/
>>> +};
>>> +
>>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
>>> +  typedef rtx value_type;
>>> +  typedef binop_key compare_type;
>>> +
>>> +  static hashval_t
>>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
>>> cmp_op_mode)
>>> +  {
>>> +inchash::hash hstate (0);
>>> +hstate.add_int (code);
>>> +hstate.add_int (value_mode);
>>> +hstate.add_int (cmp_op_mode);
>>> +return hstate.end ();
>>> +  }
>>> +
>>> +  static hashval_t
>>> +  hash (const rtx )
>>> +  {
>>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
>>> 0)));
>>> +  }
>>> +
>>> +  static bool
>>> +  equal (const rtx , const binop_key )
>>> +  {
>>> +return (GET_CODE (ref1) == ref2.code)
>>> +&& (GET_MODE (ref1) == ref2.value_mode)
>>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
>>> +  }
>>> +};
>>> +
>>> +static GTY ((cache)) hash_table *cached_binops;
>>> +
>>> +static rtx
>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
>>> +   machine_mode cmp_op_mode)
>>> +{
>>> +  if (!cached_binops)
>>> +cached_binops = hash_table::create_ggc (1024);
>>> +  binop_key key = { code, value_mode, cmp_op_mode };
>>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
>>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
>>> +  if (!*slot)
>>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
>>> (cmp_op_mode),
>>> + gen_reg_rtx (cmp_op_mode));
>>> +  return *slot;
>>> +}
>> 
>> Sorry, I didn't mean anything this complicated.  I just meant that
>> we should have a single cached rtx that we can change via PUT_CODE and
>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>> time.
>> 
>> Something like:
>> 
>> static GTY ((cache)) rtx cached_binop;
>> 
>> rtx
>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
>> {
>> if (cached_binop)
>>  {
>>PUT_CODE (cached_binop, code);
>>PUT_MODE_RAW (cached_binop, mode);
>>PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>>PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>>  }
>> else
>>  {
>>rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>>rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>>cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>>  }
>> return cached_binop;
>> }
> 
> Hmm, maybe we need  auto_rtx (code) that constructs such
> RTX on the stack instead of wasting a GC root (and causing
> issues for future threading of GCC ;)).
 
 Do you mean something like this?
 
 union {
 char raw[rtx_code_size[code]];
 rtx rtx;
 } binop;
 
 Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
 anything useful), or should I implement this?
>>> 
>>> It doesn't exist AFAIK, I thought about using alloca like
>>> 
>>> rtx tem;
>>> rtx_alloca (tem, PLUS);
>>> 
>>> and due to using alloca rtx_alloca has to be a macro like
>>> 
>>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
>>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
>>> 
>>> maybe C++ can help making this prettier but of course
>>> since we use alloca we have to avoid opening new scopes.
>>> 
>>> I guess templates like with auto_vec doesn't work unless
>>> we can make RTX_CODE_SIZE constant-evaluated.
>>> 
>>> Richard.
>> 
>> I ended up with the following change:
>> 
>> diff --git 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-26 Thread Richard Biener
On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich  wrote:
>
> > Am 26.08.2019 um 10:49 schrieb Richard Biener :
> >
> > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  wrote:
> >>
> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener 
> >>> :
> >>>
> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
> >>>  wrote:
> 
>  Ilya Leoshkevich  writes:
> > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
> > mode,
> >  return 0;
> > }
> >
> > +/* can_vector_compare_p presents fake rtx binary operations to the the 
> > back-end
> > +   in order to determine its capabilities.  In order to avoid creating 
> > fake
> > +   operations on each call, values from previous calls are cached in a 
> > global
> > +   cached_binops hash_table.  It contains rtxes, which can be looked 
> > up using
> > +   binop_keys.  */
> > +
> > +struct binop_key {
> > +  enum rtx_code code;/* Operation code.  */
> > +  machine_mode value_mode;   /* Result mode. */
> > +  machine_mode cmp_op_mode;  /* Operand mode.*/
> > +};
> > +
> > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> > +  typedef rtx value_type;
> > +  typedef binop_key compare_type;
> > +
> > +  static hashval_t
> > +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> > cmp_op_mode)
> > +  {
> > +inchash::hash hstate (0);
> > +hstate.add_int (code);
> > +hstate.add_int (value_mode);
> > +hstate.add_int (cmp_op_mode);
> > +return hstate.end ();
> > +  }
> > +
> > +  static hashval_t
> > +  hash (const rtx )
> > +  {
> > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
> > 0)));
> > +  }
> > +
> > +  static bool
> > +  equal (const rtx , const binop_key )
> > +  {
> > +return (GET_CODE (ref1) == ref2.code)
> > +&& (GET_MODE (ref1) == ref2.value_mode)
> > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> > +  }
> > +};
> > +
> > +static GTY ((cache)) hash_table *cached_binops;
> > +
> > +static rtx
> > +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> > +   machine_mode cmp_op_mode)
> > +{
> > +  if (!cached_binops)
> > +cached_binops = hash_table::create_ggc (1024);
> > +  binop_key key = { code, value_mode, cmp_op_mode };
> > +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> > +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> > +  if (!*slot)
> > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
> > (cmp_op_mode),
> > + gen_reg_rtx (cmp_op_mode));
> > +  return *slot;
> > +}
> 
>  Sorry, I didn't mean anything this complicated.  I just meant that
>  we should have a single cached rtx that we can change via PUT_CODE and
>  PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>  time.
> 
>  Something like:
> 
>  static GTY ((cache)) rtx cached_binop;
> 
>  rtx
>  get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
>  {
>  if (cached_binop)
>    {
>  PUT_CODE (cached_binop, code);
>  PUT_MODE_RAW (cached_binop, mode);
>  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>    }
>  else
>    {
>  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>    }
>  return cached_binop;
>  }
> >>>
> >>> Hmm, maybe we need  auto_rtx (code) that constructs such
> >>> RTX on the stack instead of wasting a GC root (and causing
> >>> issues for future threading of GCC ;)).
> >>
> >> Do you mean something like this?
> >>
> >> union {
> >>  char raw[rtx_code_size[code]];
> >>  rtx rtx;
> >> } binop;
> >>
> >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
> >> anything useful), or should I implement this?
> >
> > It doesn't exist AFAIK, I thought about using alloca like
> >
> > rtx tem;
> > rtx_alloca (tem, PLUS);
> >
> > and due to using alloca rtx_alloca has to be a macro like
> >
> > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
> > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
> >
> > maybe C++ can help making this prettier but of course
> > since we use alloca we have to avoid opening new scopes.
> >
> > I guess templates like with auto_vec doesn't work unless
> > we can make RTX_CODE_SIZE constant-evaluated.
> >
> > Richard.
>
> I ended up with the following change:
>
> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
> index a667cdab94e..97aa2144e95 100644
> --- 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-26 Thread Ilya Leoshkevich
> Am 26.08.2019 um 10:49 schrieb Richard Biener :
> 
> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  wrote:
>> 
>>> Am 23.08.2019 um 13:24 schrieb Richard Biener :
>>> 
>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>>>  wrote:
 
 Ilya Leoshkevich  writes:
> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
> mode,
>  return 0;
> }
> 
> +/* can_vector_compare_p presents fake rtx binary operations to the the 
> back-end
> +   in order to determine its capabilities.  In order to avoid creating 
> fake
> +   operations on each call, values from previous calls are cached in a 
> global
> +   cached_binops hash_table.  It contains rtxes, which can be looked up 
> using
> +   binop_keys.  */
> +
> +struct binop_key {
> +  enum rtx_code code;/* Operation code.  */
> +  machine_mode value_mode;   /* Result mode. */
> +  machine_mode cmp_op_mode;  /* Operand mode.*/
> +};
> +
> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> +  typedef rtx value_type;
> +  typedef binop_key compare_type;
> +
> +  static hashval_t
> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> cmp_op_mode)
> +  {
> +inchash::hash hstate (0);
> +hstate.add_int (code);
> +hstate.add_int (value_mode);
> +hstate.add_int (cmp_op_mode);
> +return hstate.end ();
> +  }
> +
> +  static hashval_t
> +  hash (const rtx )
> +  {
> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
> 0)));
> +  }
> +
> +  static bool
> +  equal (const rtx , const binop_key )
> +  {
> +return (GET_CODE (ref1) == ref2.code)
> +&& (GET_MODE (ref1) == ref2.value_mode)
> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> +  }
> +};
> +
> +static GTY ((cache)) hash_table *cached_binops;
> +
> +static rtx
> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> +   machine_mode cmp_op_mode)
> +{
> +  if (!cached_binops)
> +cached_binops = hash_table::create_ggc (1024);
> +  binop_key key = { code, value_mode, cmp_op_mode };
> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> +  if (!*slot)
> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
> + gen_reg_rtx (cmp_op_mode));
> +  return *slot;
> +}
 
 Sorry, I didn't mean anything this complicated.  I just meant that
 we should have a single cached rtx that we can change via PUT_CODE and
 PUT_MODE_RAW for each new query, rather than allocating a new rtx each
 time.
 
 Something like:
 
 static GTY ((cache)) rtx cached_binop;
 
 rtx
 get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
 {
 if (cached_binop)
   {
 PUT_CODE (cached_binop, code);
 PUT_MODE_RAW (cached_binop, mode);
 PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
 PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
   }
 else
   {
 rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
 rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
 cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
   }
 return cached_binop;
 }
>>> 
>>> Hmm, maybe we need  auto_rtx (code) that constructs such
>>> RTX on the stack instead of wasting a GC root (and causing
>>> issues for future threading of GCC ;)).
>> 
>> Do you mean something like this?
>> 
>> union {
>>  char raw[rtx_code_size[code]];
>>  rtx rtx;
>> } binop;
>> 
>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
>> anything useful), or should I implement this?
> 
> It doesn't exist AFAIK, I thought about using alloca like
> 
> rtx tem;
> rtx_alloca (tem, PLUS);
> 
> and due to using alloca rtx_alloca has to be a macro like
> 
> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
> 
> maybe C++ can help making this prettier but of course
> since we use alloca we have to avoid opening new scopes.
> 
> I guess templates like with auto_vec doesn't work unless
> we can make RTX_CODE_SIZE constant-evaluated.
> 
> Richard.

I ended up with the following change:

diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index a667cdab94e..97aa2144e95 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned 
int regno)
   set_regno_raw (x, regno, nregs);
 }
 
-/* Generate a new REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
+/* Initialize a REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
don't attempt to share 

Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-26 Thread Richard Biener
On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich  wrote:
>
> > Am 23.08.2019 um 13:24 schrieb Richard Biener :
> >
> > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
> >  wrote:
> >>
> >> Ilya Leoshkevich  writes:
> >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
> >>> mode,
> >>>   return 0;
> >>> }
> >>>
> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
> >>> back-end
> >>> +   in order to determine its capabilities.  In order to avoid creating 
> >>> fake
> >>> +   operations on each call, values from previous calls are cached in a 
> >>> global
> >>> +   cached_binops hash_table.  It contains rtxes, which can be looked up 
> >>> using
> >>> +   binop_keys.  */
> >>> +
> >>> +struct binop_key {
> >>> +  enum rtx_code code;/* Operation code.  */
> >>> +  machine_mode value_mode;   /* Result mode. */
> >>> +  machine_mode cmp_op_mode;  /* Operand mode.*/
> >>> +};
> >>> +
> >>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> >>> +  typedef rtx value_type;
> >>> +  typedef binop_key compare_type;
> >>> +
> >>> +  static hashval_t
> >>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> >>> cmp_op_mode)
> >>> +  {
> >>> +inchash::hash hstate (0);
> >>> +hstate.add_int (code);
> >>> +hstate.add_int (value_mode);
> >>> +hstate.add_int (cmp_op_mode);
> >>> +return hstate.end ();
> >>> +  }
> >>> +
> >>> +  static hashval_t
> >>> +  hash (const rtx )
> >>> +  {
> >>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
> >>> 0)));
> >>> +  }
> >>> +
> >>> +  static bool
> >>> +  equal (const rtx , const binop_key )
> >>> +  {
> >>> +return (GET_CODE (ref1) == ref2.code)
> >>> +&& (GET_MODE (ref1) == ref2.value_mode)
> >>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> >>> +  }
> >>> +};
> >>> +
> >>> +static GTY ((cache)) hash_table *cached_binops;
> >>> +
> >>> +static rtx
> >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> >>> +   machine_mode cmp_op_mode)
> >>> +{
> >>> +  if (!cached_binops)
> >>> +cached_binops = hash_table::create_ggc (1024);
> >>> +  binop_key key = { code, value_mode, cmp_op_mode };
> >>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> >>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> >>> +  if (!*slot)
> >>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
> >>> + gen_reg_rtx (cmp_op_mode));
> >>> +  return *slot;
> >>> +}
> >>
> >> Sorry, I didn't mean anything this complicated.  I just meant that
> >> we should have a single cached rtx that we can change via PUT_CODE and
> >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
> >> time.
> >>
> >> Something like:
> >>
> >> static GTY ((cache)) rtx cached_binop;
> >>
> >> rtx
> >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
> >> {
> >>  if (cached_binop)
> >>{
> >>  PUT_CODE (cached_binop, code);
> >>  PUT_MODE_RAW (cached_binop, mode);
> >>  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
> >>  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
> >>}
> >>  else
> >>{
> >>  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
> >>  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
> >>  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
> >>}
> >>  return cached_binop;
> >> }
> >
> > Hmm, maybe we need  auto_rtx (code) that constructs such
> > RTX on the stack instead of wasting a GC root (and causing
> > issues for future threading of GCC ;)).
>
> Do you mean something like this?
>
> union {
>   char raw[rtx_code_size[code]];
>   rtx rtx;
> } binop;
>
> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
> anything useful), or should I implement this?

It doesn't exist AFAIK, I thought about using alloca like

 rtx tem;
 rtx_alloca (tem, PLUS);

and due to using alloca rtx_alloca has to be a macro like

#define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);

maybe C++ can help making this prettier but of course
since we use alloca we have to avoid opening new scopes.

I guess templates like with auto_vec doesn't work unless
we can make RTX_CODE_SIZE constant-evaluated.

Richard.


Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-23 Thread Richard Sandiford
Ilya Leoshkevich  writes:
>> Am 23.08.2019 um 12:43 schrieb Richard Sandiford :
>> 
>> Ilya Leoshkevich  writes:
>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
>>>   return 0;
>>> }
>>> 
>>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
>>> back-end
>>> +   in order to determine its capabilities.  In order to avoid creating fake
>>> +   operations on each call, values from previous calls are cached in a 
>>> global
>>> +   cached_binops hash_table.  It contains rtxes, which can be looked up 
>>> using
>>> +   binop_keys.  */
>>> +
>>> +struct binop_key {
>>> +  enum rtx_code code;/* Operation code.  */
>>> +  machine_mode value_mode;   /* Result mode. */
>>> +  machine_mode cmp_op_mode;  /* Operand mode.*/
>>> +};
>>> +
>>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
>>> +  typedef rtx value_type;
>>> +  typedef binop_key compare_type;
>>> +
>>> +  static hashval_t
>>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
>>> cmp_op_mode)
>>> +  {
>>> +inchash::hash hstate (0);
>>> +hstate.add_int (code);
>>> +hstate.add_int (value_mode);
>>> +hstate.add_int (cmp_op_mode);
>>> +return hstate.end ();
>>> +  }
>>> +
>>> +  static hashval_t
>>> +  hash (const rtx )
>>> +  {
>>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
>>> +  }
>>> +
>>> +  static bool
>>> +  equal (const rtx , const binop_key )
>>> +  {
>>> +return (GET_CODE (ref1) == ref2.code)
>>> +  && (GET_MODE (ref1) == ref2.value_mode)
>>> +  && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
>>> +  }
>>> +};
>>> +
>>> +static GTY ((cache)) hash_table *cached_binops;
>>> +
>>> +static rtx
>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
>>> + machine_mode cmp_op_mode)
>>> +{
>>> +  if (!cached_binops)
>>> +cached_binops = hash_table::create_ggc (1024);
>>> +  binop_key key = { code, value_mode, cmp_op_mode };
>>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
>>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
>>> +  if (!*slot)
>>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
>>> +   gen_reg_rtx (cmp_op_mode));
>>> +  return *slot;
>>> +}
>> 
>> Sorry, I didn't mean anything this complicated.  I just meant that
>> we should have a single cached rtx that we can change via PUT_CODE and
>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>> time.
>> 
>> Something like:
>> 
>> static GTY ((cache)) rtx cached_binop;
>> 
>> rtx
>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
>> {
>>  if (cached_binop)
>>{
>>  PUT_CODE (cached_binop, code);
>>  PUT_MODE_RAW (cached_binop, mode);
>>  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>>  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>>}
>>  else
>>{
>>  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>>  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>>  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>>}
>>  return cached_binop;
>> }
>> 
>> Thanks,
>> Richard
>
> Oh, I must have completely missed the point: the cache is only for
> storage, and stored values themselves don't really matter.
>
> To make rtx usable with GTY ((cache)) I had to do:
>
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &);
>  extern void gt_pch_nx (rtx &);
>  extern void gt_pch_nx (rtx &, gt_pointer_operator, void *);
>
> +inline void
> +gt_cleare_cache (rtx)
> +{
> +}
> +
>  #endif /* ! GCC_RTL_H */
>
> Does that look ok?

Ah, turns out I was thinking of "deletable" rather than "cache", sorry.

> Another think that might turn out being important: in your first
> suggestion you use
>
>  if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate)
>{
>  machine_mode cmp_mode = insn_data[icode].operand[3].mode;
>
> instead of simply insn_operand_matches - is there any difference?

I guess it was premature optimisation: if the .md file doesn't specify a
predicate, we don't even need to create the rtx.  But since most targets
probably do specify a predicate, using insn_matches is fine too.

Thanks,
Richard


Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-23 Thread Ilya Leoshkevich
> Am 23.08.2019 um 13:24 schrieb Richard Biener :
> 
> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>  wrote:
>> 
>> Ilya Leoshkevich  writes:
>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
>>>   return 0;
>>> }
>>> 
>>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
>>> back-end
>>> +   in order to determine its capabilities.  In order to avoid creating fake
>>> +   operations on each call, values from previous calls are cached in a 
>>> global
>>> +   cached_binops hash_table.  It contains rtxes, which can be looked up 
>>> using
>>> +   binop_keys.  */
>>> +
>>> +struct binop_key {
>>> +  enum rtx_code code;/* Operation code.  */
>>> +  machine_mode value_mode;   /* Result mode. */
>>> +  machine_mode cmp_op_mode;  /* Operand mode.*/
>>> +};
>>> +
>>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
>>> +  typedef rtx value_type;
>>> +  typedef binop_key compare_type;
>>> +
>>> +  static hashval_t
>>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
>>> cmp_op_mode)
>>> +  {
>>> +inchash::hash hstate (0);
>>> +hstate.add_int (code);
>>> +hstate.add_int (value_mode);
>>> +hstate.add_int (cmp_op_mode);
>>> +return hstate.end ();
>>> +  }
>>> +
>>> +  static hashval_t
>>> +  hash (const rtx )
>>> +  {
>>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
>>> +  }
>>> +
>>> +  static bool
>>> +  equal (const rtx , const binop_key )
>>> +  {
>>> +return (GET_CODE (ref1) == ref2.code)
>>> +&& (GET_MODE (ref1) == ref2.value_mode)
>>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
>>> +  }
>>> +};
>>> +
>>> +static GTY ((cache)) hash_table *cached_binops;
>>> +
>>> +static rtx
>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
>>> +   machine_mode cmp_op_mode)
>>> +{
>>> +  if (!cached_binops)
>>> +cached_binops = hash_table::create_ggc (1024);
>>> +  binop_key key = { code, value_mode, cmp_op_mode };
>>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
>>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
>>> +  if (!*slot)
>>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
>>> + gen_reg_rtx (cmp_op_mode));
>>> +  return *slot;
>>> +}
>> 
>> Sorry, I didn't mean anything this complicated.  I just meant that
>> we should have a single cached rtx that we can change via PUT_CODE and
>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>> time.
>> 
>> Something like:
>> 
>> static GTY ((cache)) rtx cached_binop;
>> 
>> rtx
>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
>> {
>>  if (cached_binop)
>>{
>>  PUT_CODE (cached_binop, code);
>>  PUT_MODE_RAW (cached_binop, mode);
>>  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>>  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>>}
>>  else
>>{
>>  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>>  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>>  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>>}
>>  return cached_binop;
>> }
> 
> Hmm, maybe we need  auto_rtx (code) that constructs such
> RTX on the stack instead of wasting a GC root (and causing
> issues for future threading of GCC ;)).

Do you mean something like this?

union {
  char raw[rtx_code_size[code]];
  rtx rtx;
} binop;

Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
anything useful), or should I implement this?


Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-23 Thread Ilya Leoshkevich
> Am 23.08.2019 um 12:43 schrieb Richard Sandiford :
> 
> Ilya Leoshkevich  writes:
>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
>>   return 0;
>> }
>> 
>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
>> back-end
>> +   in order to determine its capabilities.  In order to avoid creating fake
>> +   operations on each call, values from previous calls are cached in a 
>> global
>> +   cached_binops hash_table.  It contains rtxes, which can be looked up 
>> using
>> +   binop_keys.  */
>> +
>> +struct binop_key {
>> +  enum rtx_code code;/* Operation code.  */
>> +  machine_mode value_mode;   /* Result mode. */
>> +  machine_mode cmp_op_mode;  /* Operand mode.*/
>> +};
>> +
>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
>> +  typedef rtx value_type;
>> +  typedef binop_key compare_type;
>> +
>> +  static hashval_t
>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
>> cmp_op_mode)
>> +  {
>> +inchash::hash hstate (0);
>> +hstate.add_int (code);
>> +hstate.add_int (value_mode);
>> +hstate.add_int (cmp_op_mode);
>> +return hstate.end ();
>> +  }
>> +
>> +  static hashval_t
>> +  hash (const rtx )
>> +  {
>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
>> +  }
>> +
>> +  static bool
>> +  equal (const rtx , const binop_key )
>> +  {
>> +return (GET_CODE (ref1) == ref2.code)
>> +   && (GET_MODE (ref1) == ref2.value_mode)
>> +   && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
>> +  }
>> +};
>> +
>> +static GTY ((cache)) hash_table *cached_binops;
>> +
>> +static rtx
>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
>> +  machine_mode cmp_op_mode)
>> +{
>> +  if (!cached_binops)
>> +cached_binops = hash_table::create_ggc (1024);
>> +  binop_key key = { code, value_mode, cmp_op_mode };
>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
>> +  if (!*slot)
>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
>> +gen_reg_rtx (cmp_op_mode));
>> +  return *slot;
>> +}
> 
> Sorry, I didn't mean anything this complicated.  I just meant that
> we should have a single cached rtx that we can change via PUT_CODE and
> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
> time.
> 
> Something like:
> 
> static GTY ((cache)) rtx cached_binop;
> 
> rtx
> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
> {
>  if (cached_binop)
>{
>  PUT_CODE (cached_binop, code);
>  PUT_MODE_RAW (cached_binop, mode);
>  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>}
>  else
>{
>  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>}
>  return cached_binop;
> }
> 
> Thanks,
> Richard

Oh, I must have completely missed the point: the cache is only for
storage, and stored values themselves don't really matter.

To make rtx usable with GTY ((cache)) I had to do:

--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &);
 extern void gt_pch_nx (rtx &);
 extern void gt_pch_nx (rtx &, gt_pointer_operator, void *);

+inline void
+gt_cleare_cache (rtx)
+{
+}
+
 #endif /* ! GCC_RTL_H */

Does that look ok?

Another think that might turn out being important: in your first
suggestion you use

 if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate)
   {
 machine_mode cmp_mode = insn_data[icode].operand[3].mode;

instead of simply insn_operand_matches - is there any difference?


Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-23 Thread Richard Biener
On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
 wrote:
>
> Ilya Leoshkevich  writes:
> > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
> >return 0;
> >  }
> >
> > +/* can_vector_compare_p presents fake rtx binary operations to the the 
> > back-end
> > +   in order to determine its capabilities.  In order to avoid creating fake
> > +   operations on each call, values from previous calls are cached in a 
> > global
> > +   cached_binops hash_table.  It contains rtxes, which can be looked up 
> > using
> > +   binop_keys.  */
> > +
> > +struct binop_key {
> > +  enum rtx_code code;/* Operation code.  */
> > +  machine_mode value_mode;   /* Result mode. */
> > +  machine_mode cmp_op_mode;  /* Operand mode.*/
> > +};
> > +
> > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> > +  typedef rtx value_type;
> > +  typedef binop_key compare_type;
> > +
> > +  static hashval_t
> > +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> > cmp_op_mode)
> > +  {
> > +inchash::hash hstate (0);
> > +hstate.add_int (code);
> > +hstate.add_int (value_mode);
> > +hstate.add_int (cmp_op_mode);
> > +return hstate.end ();
> > +  }
> > +
> > +  static hashval_t
> > +  hash (const rtx )
> > +  {
> > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
> > +  }
> > +
> > +  static bool
> > +  equal (const rtx , const binop_key )
> > +  {
> > +return (GET_CODE (ref1) == ref2.code)
> > +&& (GET_MODE (ref1) == ref2.value_mode)
> > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> > +  }
> > +};
> > +
> > +static GTY ((cache)) hash_table *cached_binops;
> > +
> > +static rtx
> > +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> > +   machine_mode cmp_op_mode)
> > +{
> > +  if (!cached_binops)
> > +cached_binops = hash_table::create_ggc (1024);
> > +  binop_key key = { code, value_mode, cmp_op_mode };
> > +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> > +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> > +  if (!*slot)
> > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
> > + gen_reg_rtx (cmp_op_mode));
> > +  return *slot;
> > +}
>
> Sorry, I didn't mean anything this complicated.  I just meant that
> we should have a single cached rtx that we can change via PUT_CODE and
> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
> time.
>
> Something like:
>
> static GTY ((cache)) rtx cached_binop;
>
> rtx
> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
> {
>   if (cached_binop)
> {
>   PUT_CODE (cached_binop, code);
>   PUT_MODE_RAW (cached_binop, mode);
>   PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>   PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
> }
>   else
> {
>   rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>   rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>   cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
> }
>   return cached_binop;
> }

Hmm, maybe we need  auto_rtx (code) that constructs such
RTX on the stack instead of wasting a GC root (and causing
issues for future threading of GCC ;)).

Richard.

>
> Thanks,
> Richard


Re: [PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-23 Thread Richard Sandiford
Ilya Leoshkevich  writes:
> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
>return 0;
>  }
>  
> +/* can_vector_compare_p presents fake rtx binary operations to the the 
> back-end
> +   in order to determine its capabilities.  In order to avoid creating fake
> +   operations on each call, values from previous calls are cached in a global
> +   cached_binops hash_table.  It contains rtxes, which can be looked up using
> +   binop_keys.  */
> +
> +struct binop_key {
> +  enum rtx_code code;/* Operation code.  */
> +  machine_mode value_mode;   /* Result mode. */
> +  machine_mode cmp_op_mode;  /* Operand mode.*/
> +};
> +
> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
> +  typedef rtx value_type;
> +  typedef binop_key compare_type;
> +
> +  static hashval_t
> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> cmp_op_mode)
> +  {
> +inchash::hash hstate (0);
> +hstate.add_int (code);
> +hstate.add_int (value_mode);
> +hstate.add_int (cmp_op_mode);
> +return hstate.end ();
> +  }
> +
> +  static hashval_t
> +  hash (const rtx )
> +  {
> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
> +  }
> +
> +  static bool
> +  equal (const rtx , const binop_key )
> +  {
> +return (GET_CODE (ref1) == ref2.code)
> +&& (GET_MODE (ref1) == ref2.value_mode)
> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> +  }
> +};
> +
> +static GTY ((cache)) hash_table *cached_binops;
> +
> +static rtx
> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> +   machine_mode cmp_op_mode)
> +{
> +  if (!cached_binops)
> +cached_binops = hash_table::create_ggc (1024);
> +  binop_key key = { code, value_mode, cmp_op_mode };
> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> +  if (!*slot)
> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
> + gen_reg_rtx (cmp_op_mode));
> +  return *slot;
> +}

Sorry, I didn't mean anything this complicated.  I just meant that
we should have a single cached rtx that we can change via PUT_CODE and
PUT_MODE_RAW for each new query, rather than allocating a new rtx each
time.

Something like:

static GTY ((cache)) rtx cached_binop;

rtx
get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
{
  if (cached_binop)
{
  PUT_CODE (cached_binop, code);
  PUT_MODE_RAW (cached_binop, mode);
  PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
  PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
}
  else
{
  rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
  rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
  cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
}
  return cached_binop;
}

Thanks,
Richard


[PATCH v2 3/9] Introduce can_vector_compare_p function

2019-08-22 Thread Ilya Leoshkevich
z13 supports only non-signaling vector comparisons.  This means we
cannot vectorize LT, LE, GT, GE and LTGT when compiling for z13.
However, we cannot express this restriction today: the code only checks
whether vcond$a$b optab exists, which does not contain information about
the operation.

Introduce a function that checks whether back-end supports vector
comparisons with individual rtx codes by matching vcond expander's third
argument with a fake comparison with the corresponding rtx code.

gcc/ChangeLog:

2019-08-21  Ilya Leoshkevich  

* Makefile.in (GTFILES): Add optabs.c.
* optabs-tree.c (expand_vec_cond_expr_p): Use
can_vector_compare_p.
* optabs.c (binop_key): Binary operation cache key.
(binop_hasher): Binary operation cache hasher.
(cached_binops): Binary operation cache.
(get_cached_binop): New function that returns a cached binary
operation or creates a new one.
(can_vector_compare_p): New function.
* optabs.h (enum can_vector_compare_purpose): New enum. Not
really needed today, but can be used to extend the support to
e.g. vec_cmp if need arises.
(can_vector_compare_p): New function.
---
 gcc/Makefile.in   |  2 +-
 gcc/optabs-tree.c | 11 +--
 gcc/optabs.c  | 79 +++
 gcc/optabs.h  | 15 +
 4 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 597dc01328b..d2207da5657 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2541,7 +2541,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/function.c $(srcdir)/except.c \
   $(srcdir)/ggc-tests.c \
   $(srcdir)/gcse.c $(srcdir)/godump.c \
-  $(srcdir)/lists.c $(srcdir)/optabs-libfuncs.c \
+  $(srcdir)/lists.c $(srcdir)/optabs.c $(srcdir)/optabs-libfuncs.c \
   $(srcdir)/profile.c $(srcdir)/mcf.c \
   $(srcdir)/reg-stack.c $(srcdir)/cfgrtl.c \
   $(srcdir)/stor-layout.c \
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 8157798cc71..e68bb39c021 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -23,7 +23,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "target.h"
 #include "insn-codes.h"
+#include "rtl.h"
 #include "tree.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "stor-layout.h"
 
@@ -347,8 +350,12 @@ expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, 
enum tree_code code)
   || maybe_ne (GET_MODE_NUNITS (value_mode), GET_MODE_NUNITS 
(cmp_op_mode)))
 return false;
 
-  if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
-  TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing
+  bool unsigned_p = TYPE_UNSIGNED (cmp_op_type);
+  if (((get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
+unsigned_p) == CODE_FOR_nothing)
+   || !can_vector_compare_p (get_rtx_code (code, unsigned_p),
+TYPE_MODE (value_type),
+TYPE_MODE (cmp_op_type), cvcp_vcond))
   && ((code != EQ_EXPR && code != NE_EXPR)
  || get_vcond_eq_icode (TYPE_MODE (value_type),
 TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing))
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 9e54dda6e7f..07b4d824822 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "recog.h"
 #include "diagnostic-core.h"
 #include "rtx-vector-builder.h"
+#include "hash-table.h"
 
 /* Include insn-config.h before expr.h so that HAVE_conditional_move
is properly defined.  */
@@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
   return 0;
 }
 
+/* can_vector_compare_p presents fake rtx binary operations to the the back-end
+   in order to determine its capabilities.  In order to avoid creating fake
+   operations on each call, values from previous calls are cached in a global
+   cached_binops hash_table.  It contains rtxes, which can be looked up using
+   binop_keys.  */
+
+struct binop_key {
+  enum rtx_code code;/* Operation code.  */
+  machine_mode value_mode;   /* Result mode. */
+  machine_mode cmp_op_mode;  /* Operand mode.*/
+};
+
+struct binop_hasher : pointer_hash_mark, ggc_cache_remove {
+  typedef rtx value_type;
+  typedef binop_key compare_type;
+
+  static hashval_t
+  hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode)
+  {
+inchash::hash hstate (0);
+hstate.add_int (code);
+hstate.add_int (value_mode);
+hstate.add_int (cmp_op_mode);
+return hstate.end ();
+  }
+
+  static hashval_t
+  hash (const rtx )
+  {
+return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
+  }
+
+  static bool
+  equal (const rtx , const binop_key )
+  {
+return (GET_CODE (ref1) == ref2.code)
+  && (GET_MODE