Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
On Tue, Aug 27, 2019 at 9:01 AM Richard Sandiford wrote: > > Ilya Leoshkevich writes: > >> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich : > >> > >>> Am 26.08.2019 um 15:06 schrieb Richard Biener > >>> : > >>> > >>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich > >>> wrote: > > > Am 26.08.2019 um 10:49 schrieb Richard Biener > > : > > > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich > > wrote: > >> > >>> Am 23.08.2019 um 13:24 schrieb Richard Biener > >>> : > >>> > >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > >>> wrote: > > Ilya Leoshkevich writes: > > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, > > machine_mode mode, > > return 0; > > } > > > > +/* can_vector_compare_p presents fake rtx binary operations to the > > the back-end > > + in order to determine its capabilities. In order to avoid > > creating fake > > + operations on each call, values from previous calls are cached > > in a global > > + cached_binops hash_table. It contains rtxes, which can be > > looked up using > > + binop_keys. */ > > + > > +struct binop_key { > > + enum rtx_code code;/* Operation code. */ > > + machine_mode value_mode; /* Result mode. */ > > + machine_mode cmp_op_mode; /* Operand mode.*/ > > +}; > > + > > +struct binop_hasher : pointer_hash_mark, > > ggc_cache_remove { > > + typedef rtx value_type; > > + typedef binop_key compare_type; > > + > > + static hashval_t > > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > > cmp_op_mode) > > + { > > +inchash::hash hstate (0); > > +hstate.add_int (code); > > +hstate.add_int (value_mode); > > +hstate.add_int (cmp_op_mode); > > +return hstate.end (); > > + } > > + > > + static hashval_t > > + hash (const rtx ) > > + { > > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP > > (ref, 0))); > > + } > > + > > + static bool > > + equal (const rtx , const binop_key ) > > + { > > +return (GET_CODE (ref1) == ref2.code) > > +&& (GET_MODE (ref1) == ref2.value_mode) > > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > > + } > > +}; > > + > > +static GTY ((cache)) hash_table *cached_binops; > > + > > +static rtx > > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > > + machine_mode cmp_op_mode) > > +{ > > + if (!cached_binops) > > +cached_binops = hash_table::create_ggc (1024); > > + binop_key key = { code, value_mode, cmp_op_mode }; > > + hashval_t hash = binop_hasher::hash (code, value_mode, > > cmp_op_mode); > > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, > > INSERT); > > + if (!*slot) > > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx > > (cmp_op_mode), > > + gen_reg_rtx (cmp_op_mode)); > > + return *slot; > > +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE > and > PUT_MODE_RAW for each new query, rather than allocating a new rtx > each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode > op_mode) > { > if (cached_binop) > { > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > } > else > { > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > } > return cached_binop; > } > >>> > >>> Hmm, maybe we need auto_rtx (code) that constructs such > >>> RTX on the stack instead of wasting a GC root (and causing > >>> issues for future threading of GCC ;)). > >> > >> Do you mean something like this? > >> > >> union { > >> char raw[rtx_code_size[code]]; > >> rtx rtx; > >> } binop; > >> > >> Does this exist already (git grep
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
Ilya Leoshkevich writes: >> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich : >> >>> Am 26.08.2019 um 15:06 schrieb Richard Biener : >>> >>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich wrote: > Am 26.08.2019 um 10:49 schrieb Richard Biener > : > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich > wrote: >> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener >>> : >>> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>> wrote: Ilya Leoshkevich writes: > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, > machine_mode mode, > return 0; > } > > +/* can_vector_compare_p presents fake rtx binary operations to the > the back-end > + in order to determine its capabilities. In order to avoid > creating fake > + operations on each call, values from previous calls are cached in > a global > + cached_binops hash_table. It contains rtxes, which can be looked > up using > + binop_keys. */ > + > +struct binop_key { > + enum rtx_code code;/* Operation code. */ > + machine_mode value_mode; /* Result mode. */ > + machine_mode cmp_op_mode; /* Operand mode.*/ > +}; > + > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > + typedef rtx value_type; > + typedef binop_key compare_type; > + > + static hashval_t > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > cmp_op_mode) > + { > +inchash::hash hstate (0); > +hstate.add_int (code); > +hstate.add_int (value_mode); > +hstate.add_int (cmp_op_mode); > +return hstate.end (); > + } > + > + static hashval_t > + hash (const rtx ) > + { > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP > (ref, 0))); > + } > + > + static bool > + equal (const rtx , const binop_key ) > + { > +return (GET_CODE (ref1) == ref2.code) > +&& (GET_MODE (ref1) == ref2.value_mode) > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > + } > +}; > + > +static GTY ((cache)) hash_table *cached_binops; > + > +static rtx > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > + machine_mode cmp_op_mode) > +{ > + if (!cached_binops) > +cached_binops = hash_table::create_ggc (1024); > + binop_key key = { code, value_mode, cmp_op_mode }; > + hashval_t hash = binop_hasher::hash (code, value_mode, > cmp_op_mode); > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > + if (!*slot) > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx > (cmp_op_mode), > + gen_reg_rtx (cmp_op_mode)); > + return *slot; > +} Sorry, I didn't mean anything this complicated. I just meant that we should have a single cached rtx that we can change via PUT_CODE and PUT_MODE_RAW for each new query, rather than allocating a new rtx each time. Something like: static GTY ((cache)) rtx cached_binop; rtx get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) { if (cached_binop) { PUT_CODE (cached_binop, code); PUT_MODE_RAW (cached_binop, mode); PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); } else { rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); } return cached_binop; } >>> >>> Hmm, maybe we need auto_rtx (code) that constructs such >>> RTX on the stack instead of wasting a GC root (and causing >>> issues for future threading of GCC ;)). >> >> Do you mean something like this? >> >> union { >> char raw[rtx_code_size[code]]; >> rtx rtx; >> } binop; >> >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >> anything useful), or should I implement this? > > It doesn't exist AFAIK, I thought about using alloca like > > rtx tem; > rtx_alloca (tem, PLUS); > > and due to using alloca rtx_alloca has to be a macro like > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); >
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich : > >> Am 26.08.2019 um 15:06 schrieb Richard Biener : >> >> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich wrote: >>> Am 26.08.2019 um 10:49 schrieb Richard Biener : On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich wrote: > >> Am 23.08.2019 um 13:24 schrieb Richard Biener >> : >> >> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >> wrote: >>> >>> Ilya Leoshkevich writes: @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, return 0; } +/* can_vector_compare_p presents fake rtx binary operations to the the back-end + in order to determine its capabilities. In order to avoid creating fake + operations on each call, values from previous calls are cached in a global + cached_binops hash_table. It contains rtxes, which can be looked up using + binop_keys. */ + +struct binop_key { + enum rtx_code code;/* Operation code. */ + machine_mode value_mode; /* Result mode. */ + machine_mode cmp_op_mode; /* Operand mode.*/ +}; + +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { + typedef rtx value_type; + typedef binop_key compare_type; + + static hashval_t + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) + { +inchash::hash hstate (0); +hstate.add_int (code); +hstate.add_int (value_mode); +hstate.add_int (cmp_op_mode); +return hstate.end (); + } + + static hashval_t + hash (const rtx ) + { +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); + } + + static bool + equal (const rtx , const binop_key ) + { +return (GET_CODE (ref1) == ref2.code) +&& (GET_MODE (ref1) == ref2.value_mode) +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); + } +}; + +static GTY ((cache)) hash_table *cached_binops; + +static rtx +get_cached_binop (enum rtx_code code, machine_mode value_mode, + machine_mode cmp_op_mode) +{ + if (!cached_binops) +cached_binops = hash_table::create_ggc (1024); + binop_key key = { code, value_mode, cmp_op_mode }; + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); + if (!*slot) +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), + gen_reg_rtx (cmp_op_mode)); + return *slot; +} >>> >>> Sorry, I didn't mean anything this complicated. I just meant that >>> we should have a single cached rtx that we can change via PUT_CODE and >>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >>> time. >>> >>> Something like: >>> >>> static GTY ((cache)) rtx cached_binop; >>> >>> rtx >>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode >>> op_mode) >>> { >>> if (cached_binop) >>> { >>> PUT_CODE (cached_binop, code); >>> PUT_MODE_RAW (cached_binop, mode); >>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>> } >>> else >>> { >>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>> } >>> return cached_binop; >>> } >> >> Hmm, maybe we need auto_rtx (code) that constructs such >> RTX on the stack instead of wasting a GC root (and causing >> issues for future threading of GCC ;)). > > Do you mean something like this? > > union { > char raw[rtx_code_size[code]]; > rtx rtx; > } binop; > > Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > anything useful), or should I implement this? It doesn't exist AFAIK, I thought about using alloca like rtx tem; rtx_alloca (tem, PLUS); and due to using alloca rtx_alloca has to be a macro like #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); maybe C++ can help making this prettier but of course since we use alloca we have to avoid opening new scopes.
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
> Am 26.08.2019 um 15:06 schrieb Richard Biener : > > On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich wrote: >> >>> Am 26.08.2019 um 10:49 schrieb Richard Biener : >>> >>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich wrote: > Am 23.08.2019 um 13:24 schrieb Richard Biener > : > > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > wrote: >> >> Ilya Leoshkevich writes: >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode >>> mode, >>> return 0; >>> } >>> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the >>> back-end >>> + in order to determine its capabilities. In order to avoid creating >>> fake >>> + operations on each call, values from previous calls are cached in a >>> global >>> + cached_binops hash_table. It contains rtxes, which can be looked >>> up using >>> + binop_keys. */ >>> + >>> +struct binop_key { >>> + enum rtx_code code;/* Operation code. */ >>> + machine_mode value_mode; /* Result mode. */ >>> + machine_mode cmp_op_mode; /* Operand mode.*/ >>> +}; >>> + >>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { >>> + typedef rtx value_type; >>> + typedef binop_key compare_type; >>> + >>> + static hashval_t >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode >>> cmp_op_mode) >>> + { >>> +inchash::hash hstate (0); >>> +hstate.add_int (code); >>> +hstate.add_int (value_mode); >>> +hstate.add_int (cmp_op_mode); >>> +return hstate.end (); >>> + } >>> + >>> + static hashval_t >>> + hash (const rtx ) >>> + { >>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, >>> 0))); >>> + } >>> + >>> + static bool >>> + equal (const rtx , const binop_key ) >>> + { >>> +return (GET_CODE (ref1) == ref2.code) >>> +&& (GET_MODE (ref1) == ref2.value_mode) >>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>> + } >>> +}; >>> + >>> +static GTY ((cache)) hash_table *cached_binops; >>> + >>> +static rtx >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>> + machine_mode cmp_op_mode) >>> +{ >>> + if (!cached_binops) >>> +cached_binops = hash_table::create_ggc (1024); >>> + binop_key key = { code, value_mode, cmp_op_mode }; >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>> + if (!*slot) >>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx >>> (cmp_op_mode), >>> + gen_reg_rtx (cmp_op_mode)); >>> + return *slot; >>> +} >> >> Sorry, I didn't mean anything this complicated. I just meant that >> we should have a single cached rtx that we can change via PUT_CODE and >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >> time. >> >> Something like: >> >> static GTY ((cache)) rtx cached_binop; >> >> rtx >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >> { >> if (cached_binop) >> { >>PUT_CODE (cached_binop, code); >>PUT_MODE_RAW (cached_binop, mode); >>PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >> } >> else >> { >>rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >> } >> return cached_binop; >> } > > Hmm, maybe we need auto_rtx (code) that constructs such > RTX on the stack instead of wasting a GC root (and causing > issues for future threading of GCC ;)). Do you mean something like this? union { char raw[rtx_code_size[code]]; rtx rtx; } binop; Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show anything useful), or should I implement this? >>> >>> It doesn't exist AFAIK, I thought about using alloca like >>> >>> rtx tem; >>> rtx_alloca (tem, PLUS); >>> >>> and due to using alloca rtx_alloca has to be a macro like >>> >>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); >>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); >>> >>> maybe C++ can help making this prettier but of course >>> since we use alloca we have to avoid opening new scopes. >>> >>> I guess templates like with auto_vec doesn't work unless >>> we can make RTX_CODE_SIZE constant-evaluated. >>> >>> Richard. >> >> I ended up with the following change: >> >> diff --git
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich wrote: > > > Am 26.08.2019 um 10:49 schrieb Richard Biener : > > > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich wrote: > >> > >>> Am 23.08.2019 um 13:24 schrieb Richard Biener > >>> : > >>> > >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > >>> wrote: > > Ilya Leoshkevich writes: > > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode > > mode, > > return 0; > > } > > > > +/* can_vector_compare_p presents fake rtx binary operations to the the > > back-end > > + in order to determine its capabilities. In order to avoid creating > > fake > > + operations on each call, values from previous calls are cached in a > > global > > + cached_binops hash_table. It contains rtxes, which can be looked > > up using > > + binop_keys. */ > > + > > +struct binop_key { > > + enum rtx_code code;/* Operation code. */ > > + machine_mode value_mode; /* Result mode. */ > > + machine_mode cmp_op_mode; /* Operand mode.*/ > > +}; > > + > > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > > + typedef rtx value_type; > > + typedef binop_key compare_type; > > + > > + static hashval_t > > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > > cmp_op_mode) > > + { > > +inchash::hash hstate (0); > > +hstate.add_int (code); > > +hstate.add_int (value_mode); > > +hstate.add_int (cmp_op_mode); > > +return hstate.end (); > > + } > > + > > + static hashval_t > > + hash (const rtx ) > > + { > > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, > > 0))); > > + } > > + > > + static bool > > + equal (const rtx , const binop_key ) > > + { > > +return (GET_CODE (ref1) == ref2.code) > > +&& (GET_MODE (ref1) == ref2.value_mode) > > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > > + } > > +}; > > + > > +static GTY ((cache)) hash_table *cached_binops; > > + > > +static rtx > > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > > + machine_mode cmp_op_mode) > > +{ > > + if (!cached_binops) > > +cached_binops = hash_table::create_ggc (1024); > > + binop_key key = { code, value_mode, cmp_op_mode }; > > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > > + if (!*slot) > > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx > > (cmp_op_mode), > > + gen_reg_rtx (cmp_op_mode)); > > + return *slot; > > +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE and > PUT_MODE_RAW for each new query, rather than allocating a new rtx each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > { > if (cached_binop) > { > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > } > else > { > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > } > return cached_binop; > } > >>> > >>> Hmm, maybe we need auto_rtx (code) that constructs such > >>> RTX on the stack instead of wasting a GC root (and causing > >>> issues for future threading of GCC ;)). > >> > >> Do you mean something like this? > >> > >> union { > >> char raw[rtx_code_size[code]]; > >> rtx rtx; > >> } binop; > >> > >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > >> anything useful), or should I implement this? > > > > It doesn't exist AFAIK, I thought about using alloca like > > > > rtx tem; > > rtx_alloca (tem, PLUS); > > > > and due to using alloca rtx_alloca has to be a macro like > > > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > > > > maybe C++ can help making this prettier but of course > > since we use alloca we have to avoid opening new scopes. > > > > I guess templates like with auto_vec doesn't work unless > > we can make RTX_CODE_SIZE constant-evaluated. > > > > Richard. > > I ended up with the following change: > > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > index a667cdab94e..97aa2144e95 100644 > ---
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
> Am 26.08.2019 um 10:49 schrieb Richard Biener : > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich wrote: >> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener : >>> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>> wrote: Ilya Leoshkevich writes: > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode > mode, > return 0; > } > > +/* can_vector_compare_p presents fake rtx binary operations to the the > back-end > + in order to determine its capabilities. In order to avoid creating > fake > + operations on each call, values from previous calls are cached in a > global > + cached_binops hash_table. It contains rtxes, which can be looked up > using > + binop_keys. */ > + > +struct binop_key { > + enum rtx_code code;/* Operation code. */ > + machine_mode value_mode; /* Result mode. */ > + machine_mode cmp_op_mode; /* Operand mode.*/ > +}; > + > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > + typedef rtx value_type; > + typedef binop_key compare_type; > + > + static hashval_t > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > cmp_op_mode) > + { > +inchash::hash hstate (0); > +hstate.add_int (code); > +hstate.add_int (value_mode); > +hstate.add_int (cmp_op_mode); > +return hstate.end (); > + } > + > + static hashval_t > + hash (const rtx ) > + { > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, > 0))); > + } > + > + static bool > + equal (const rtx , const binop_key ) > + { > +return (GET_CODE (ref1) == ref2.code) > +&& (GET_MODE (ref1) == ref2.value_mode) > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > + } > +}; > + > +static GTY ((cache)) hash_table *cached_binops; > + > +static rtx > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > + machine_mode cmp_op_mode) > +{ > + if (!cached_binops) > +cached_binops = hash_table::create_ggc (1024); > + binop_key key = { code, value_mode, cmp_op_mode }; > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > + if (!*slot) > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > + gen_reg_rtx (cmp_op_mode)); > + return *slot; > +} Sorry, I didn't mean anything this complicated. I just meant that we should have a single cached rtx that we can change via PUT_CODE and PUT_MODE_RAW for each new query, rather than allocating a new rtx each time. Something like: static GTY ((cache)) rtx cached_binop; rtx get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) { if (cached_binop) { PUT_CODE (cached_binop, code); PUT_MODE_RAW (cached_binop, mode); PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); } else { rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); } return cached_binop; } >>> >>> Hmm, maybe we need auto_rtx (code) that constructs such >>> RTX on the stack instead of wasting a GC root (and causing >>> issues for future threading of GCC ;)). >> >> Do you mean something like this? >> >> union { >> char raw[rtx_code_size[code]]; >> rtx rtx; >> } binop; >> >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >> anything useful), or should I implement this? > > It doesn't exist AFAIK, I thought about using alloca like > > rtx tem; > rtx_alloca (tem, PLUS); > > and due to using alloca rtx_alloca has to be a macro like > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > > maybe C++ can help making this prettier but of course > since we use alloca we have to avoid opening new scopes. > > I guess templates like with auto_vec doesn't work unless > we can make RTX_CODE_SIZE constant-evaluated. > > Richard. I ended up with the following change: diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index a667cdab94e..97aa2144e95 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) set_regno_raw (x, regno, nregs); } -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and don't attempt to share
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich wrote: > > > Am 23.08.2019 um 13:24 schrieb Richard Biener : > > > > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > > wrote: > >> > >> Ilya Leoshkevich writes: > >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode > >>> mode, > >>> return 0; > >>> } > >>> > >>> +/* can_vector_compare_p presents fake rtx binary operations to the the > >>> back-end > >>> + in order to determine its capabilities. In order to avoid creating > >>> fake > >>> + operations on each call, values from previous calls are cached in a > >>> global > >>> + cached_binops hash_table. It contains rtxes, which can be looked up > >>> using > >>> + binop_keys. */ > >>> + > >>> +struct binop_key { > >>> + enum rtx_code code;/* Operation code. */ > >>> + machine_mode value_mode; /* Result mode. */ > >>> + machine_mode cmp_op_mode; /* Operand mode.*/ > >>> +}; > >>> + > >>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > >>> + typedef rtx value_type; > >>> + typedef binop_key compare_type; > >>> + > >>> + static hashval_t > >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode > >>> cmp_op_mode) > >>> + { > >>> +inchash::hash hstate (0); > >>> +hstate.add_int (code); > >>> +hstate.add_int (value_mode); > >>> +hstate.add_int (cmp_op_mode); > >>> +return hstate.end (); > >>> + } > >>> + > >>> + static hashval_t > >>> + hash (const rtx ) > >>> + { > >>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, > >>> 0))); > >>> + } > >>> + > >>> + static bool > >>> + equal (const rtx , const binop_key ) > >>> + { > >>> +return (GET_CODE (ref1) == ref2.code) > >>> +&& (GET_MODE (ref1) == ref2.value_mode) > >>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > >>> + } > >>> +}; > >>> + > >>> +static GTY ((cache)) hash_table *cached_binops; > >>> + > >>> +static rtx > >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, > >>> + machine_mode cmp_op_mode) > >>> +{ > >>> + if (!cached_binops) > >>> +cached_binops = hash_table::create_ggc (1024); > >>> + binop_key key = { code, value_mode, cmp_op_mode }; > >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > >>> + if (!*slot) > >>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > >>> + gen_reg_rtx (cmp_op_mode)); > >>> + return *slot; > >>> +} > >> > >> Sorry, I didn't mean anything this complicated. I just meant that > >> we should have a single cached rtx that we can change via PUT_CODE and > >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each > >> time. > >> > >> Something like: > >> > >> static GTY ((cache)) rtx cached_binop; > >> > >> rtx > >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > >> { > >> if (cached_binop) > >>{ > >> PUT_CODE (cached_binop, code); > >> PUT_MODE_RAW (cached_binop, mode); > >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > >>} > >> else > >>{ > >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > >>} > >> return cached_binop; > >> } > > > > Hmm, maybe we need auto_rtx (code) that constructs such > > RTX on the stack instead of wasting a GC root (and causing > > issues for future threading of GCC ;)). > > Do you mean something like this? > > union { > char raw[rtx_code_size[code]]; > rtx rtx; > } binop; > > Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > anything useful), or should I implement this? It doesn't exist AFAIK, I thought about using alloca like rtx tem; rtx_alloca (tem, PLUS); and due to using alloca rtx_alloca has to be a macro like #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); maybe C++ can help making this prettier but of course since we use alloca we have to avoid opening new scopes. I guess templates like with auto_vec doesn't work unless we can make RTX_CODE_SIZE constant-evaluated. Richard.
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
Ilya Leoshkevich writes: >> Am 23.08.2019 um 12:43 schrieb Richard Sandiford : >> >> Ilya Leoshkevich writes: >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>> return 0; >>> } >>> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the >>> back-end >>> + in order to determine its capabilities. In order to avoid creating fake >>> + operations on each call, values from previous calls are cached in a >>> global >>> + cached_binops hash_table. It contains rtxes, which can be looked up >>> using >>> + binop_keys. */ >>> + >>> +struct binop_key { >>> + enum rtx_code code;/* Operation code. */ >>> + machine_mode value_mode; /* Result mode. */ >>> + machine_mode cmp_op_mode; /* Operand mode.*/ >>> +}; >>> + >>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { >>> + typedef rtx value_type; >>> + typedef binop_key compare_type; >>> + >>> + static hashval_t >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode >>> cmp_op_mode) >>> + { >>> +inchash::hash hstate (0); >>> +hstate.add_int (code); >>> +hstate.add_int (value_mode); >>> +hstate.add_int (cmp_op_mode); >>> +return hstate.end (); >>> + } >>> + >>> + static hashval_t >>> + hash (const rtx ) >>> + { >>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>> + } >>> + >>> + static bool >>> + equal (const rtx , const binop_key ) >>> + { >>> +return (GET_CODE (ref1) == ref2.code) >>> + && (GET_MODE (ref1) == ref2.value_mode) >>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>> + } >>> +}; >>> + >>> +static GTY ((cache)) hash_table *cached_binops; >>> + >>> +static rtx >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>> + machine_mode cmp_op_mode) >>> +{ >>> + if (!cached_binops) >>> +cached_binops = hash_table::create_ggc (1024); >>> + binop_key key = { code, value_mode, cmp_op_mode }; >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>> + if (!*slot) >>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>> + gen_reg_rtx (cmp_op_mode)); >>> + return *slot; >>> +} >> >> Sorry, I didn't mean anything this complicated. I just meant that >> we should have a single cached rtx that we can change via PUT_CODE and >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >> time. >> >> Something like: >> >> static GTY ((cache)) rtx cached_binop; >> >> rtx >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >> { >> if (cached_binop) >>{ >> PUT_CODE (cached_binop, code); >> PUT_MODE_RAW (cached_binop, mode); >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>} >> else >>{ >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>} >> return cached_binop; >> } >> >> Thanks, >> Richard > > Oh, I must have completely missed the point: the cache is only for > storage, and stored values themselves don't really matter. > > To make rtx usable with GTY ((cache)) I had to do: > > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &); > extern void gt_pch_nx (rtx &); > extern void gt_pch_nx (rtx &, gt_pointer_operator, void *); > > +inline void > +gt_cleare_cache (rtx) > +{ > +} > + > #endif /* ! GCC_RTL_H */ > > Does that look ok? Ah, turns out I was thinking of "deletable" rather than "cache", sorry. > Another think that might turn out being important: in your first > suggestion you use > > if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate) >{ > machine_mode cmp_mode = insn_data[icode].operand[3].mode; > > instead of simply insn_operand_matches - is there any difference? I guess it was premature optimisation: if the .md file doesn't specify a predicate, we don't even need to create the rtx. But since most targets probably do specify a predicate, using insn_matches is fine too. Thanks, Richard
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
> Am 23.08.2019 um 13:24 schrieb Richard Biener : > > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > wrote: >> >> Ilya Leoshkevich writes: >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>> return 0; >>> } >>> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the >>> back-end >>> + in order to determine its capabilities. In order to avoid creating fake >>> + operations on each call, values from previous calls are cached in a >>> global >>> + cached_binops hash_table. It contains rtxes, which can be looked up >>> using >>> + binop_keys. */ >>> + >>> +struct binop_key { >>> + enum rtx_code code;/* Operation code. */ >>> + machine_mode value_mode; /* Result mode. */ >>> + machine_mode cmp_op_mode; /* Operand mode.*/ >>> +}; >>> + >>> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { >>> + typedef rtx value_type; >>> + typedef binop_key compare_type; >>> + >>> + static hashval_t >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode >>> cmp_op_mode) >>> + { >>> +inchash::hash hstate (0); >>> +hstate.add_int (code); >>> +hstate.add_int (value_mode); >>> +hstate.add_int (cmp_op_mode); >>> +return hstate.end (); >>> + } >>> + >>> + static hashval_t >>> + hash (const rtx ) >>> + { >>> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>> + } >>> + >>> + static bool >>> + equal (const rtx , const binop_key ) >>> + { >>> +return (GET_CODE (ref1) == ref2.code) >>> +&& (GET_MODE (ref1) == ref2.value_mode) >>> +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>> + } >>> +}; >>> + >>> +static GTY ((cache)) hash_table *cached_binops; >>> + >>> +static rtx >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>> + machine_mode cmp_op_mode) >>> +{ >>> + if (!cached_binops) >>> +cached_binops = hash_table::create_ggc (1024); >>> + binop_key key = { code, value_mode, cmp_op_mode }; >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>> + if (!*slot) >>> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>> + gen_reg_rtx (cmp_op_mode)); >>> + return *slot; >>> +} >> >> Sorry, I didn't mean anything this complicated. I just meant that >> we should have a single cached rtx that we can change via PUT_CODE and >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >> time. >> >> Something like: >> >> static GTY ((cache)) rtx cached_binop; >> >> rtx >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >> { >> if (cached_binop) >>{ >> PUT_CODE (cached_binop, code); >> PUT_MODE_RAW (cached_binop, mode); >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>} >> else >>{ >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>} >> return cached_binop; >> } > > Hmm, maybe we need auto_rtx (code) that constructs such > RTX on the stack instead of wasting a GC root (and causing > issues for future threading of GCC ;)). Do you mean something like this? union { char raw[rtx_code_size[code]]; rtx rtx; } binop; Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show anything useful), or should I implement this?
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
> Am 23.08.2019 um 12:43 schrieb Richard Sandiford : > > Ilya Leoshkevich writes: >> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >> return 0; >> } >> >> +/* can_vector_compare_p presents fake rtx binary operations to the the >> back-end >> + in order to determine its capabilities. In order to avoid creating fake >> + operations on each call, values from previous calls are cached in a >> global >> + cached_binops hash_table. It contains rtxes, which can be looked up >> using >> + binop_keys. */ >> + >> +struct binop_key { >> + enum rtx_code code;/* Operation code. */ >> + machine_mode value_mode; /* Result mode. */ >> + machine_mode cmp_op_mode; /* Operand mode.*/ >> +}; >> + >> +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { >> + typedef rtx value_type; >> + typedef binop_key compare_type; >> + >> + static hashval_t >> + hash (enum rtx_code code, machine_mode value_mode, machine_mode >> cmp_op_mode) >> + { >> +inchash::hash hstate (0); >> +hstate.add_int (code); >> +hstate.add_int (value_mode); >> +hstate.add_int (cmp_op_mode); >> +return hstate.end (); >> + } >> + >> + static hashval_t >> + hash (const rtx ) >> + { >> +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >> + } >> + >> + static bool >> + equal (const rtx , const binop_key ) >> + { >> +return (GET_CODE (ref1) == ref2.code) >> + && (GET_MODE (ref1) == ref2.value_mode) >> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >> + } >> +}; >> + >> +static GTY ((cache)) hash_table *cached_binops; >> + >> +static rtx >> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >> + machine_mode cmp_op_mode) >> +{ >> + if (!cached_binops) >> +cached_binops = hash_table::create_ggc (1024); >> + binop_key key = { code, value_mode, cmp_op_mode }; >> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >> + if (!*slot) >> +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >> +gen_reg_rtx (cmp_op_mode)); >> + return *slot; >> +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE and > PUT_MODE_RAW for each new query, rather than allocating a new rtx each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > { > if (cached_binop) >{ > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >} > else >{ > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >} > return cached_binop; > } > > Thanks, > Richard Oh, I must have completely missed the point: the cache is only for storage, and stored values themselves don't really matter. To make rtx usable with GTY ((cache)) I had to do: --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &); extern void gt_pch_nx (rtx &); extern void gt_pch_nx (rtx &, gt_pointer_operator, void *); +inline void +gt_cleare_cache (rtx) +{ +} + #endif /* ! GCC_RTL_H */ Does that look ok? Another think that might turn out being important: in your first suggestion you use if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate) { machine_mode cmp_mode = insn_data[icode].operand[3].mode; instead of simply insn_operand_matches - is there any difference?
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford wrote: > > Ilya Leoshkevich writes: > > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > >return 0; > > } > > > > +/* can_vector_compare_p presents fake rtx binary operations to the the > > back-end > > + in order to determine its capabilities. In order to avoid creating fake > > + operations on each call, values from previous calls are cached in a > > global > > + cached_binops hash_table. It contains rtxes, which can be looked up > > using > > + binop_keys. */ > > + > > +struct binop_key { > > + enum rtx_code code;/* Operation code. */ > > + machine_mode value_mode; /* Result mode. */ > > + machine_mode cmp_op_mode; /* Operand mode.*/ > > +}; > > + > > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > > + typedef rtx value_type; > > + typedef binop_key compare_type; > > + > > + static hashval_t > > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > > cmp_op_mode) > > + { > > +inchash::hash hstate (0); > > +hstate.add_int (code); > > +hstate.add_int (value_mode); > > +hstate.add_int (cmp_op_mode); > > +return hstate.end (); > > + } > > + > > + static hashval_t > > + hash (const rtx ) > > + { > > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > > + } > > + > > + static bool > > + equal (const rtx , const binop_key ) > > + { > > +return (GET_CODE (ref1) == ref2.code) > > +&& (GET_MODE (ref1) == ref2.value_mode) > > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > > + } > > +}; > > + > > +static GTY ((cache)) hash_table *cached_binops; > > + > > +static rtx > > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > > + machine_mode cmp_op_mode) > > +{ > > + if (!cached_binops) > > +cached_binops = hash_table::create_ggc (1024); > > + binop_key key = { code, value_mode, cmp_op_mode }; > > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > > + if (!*slot) > > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > > + gen_reg_rtx (cmp_op_mode)); > > + return *slot; > > +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE and > PUT_MODE_RAW for each new query, rather than allocating a new rtx each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > { > if (cached_binop) > { > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > } > else > { > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > } > return cached_binop; > } Hmm, maybe we need auto_rtx (code) that constructs such RTX on the stack instead of wasting a GC root (and causing issues for future threading of GCC ;)). Richard. > > Thanks, > Richard
Re: [PATCH v2 3/9] Introduce can_vector_compare_p function
Ilya Leoshkevich writes: > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >return 0; > } > > +/* can_vector_compare_p presents fake rtx binary operations to the the > back-end > + in order to determine its capabilities. In order to avoid creating fake > + operations on each call, values from previous calls are cached in a global > + cached_binops hash_table. It contains rtxes, which can be looked up using > + binop_keys. */ > + > +struct binop_key { > + enum rtx_code code;/* Operation code. */ > + machine_mode value_mode; /* Result mode. */ > + machine_mode cmp_op_mode; /* Operand mode.*/ > +}; > + > +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { > + typedef rtx value_type; > + typedef binop_key compare_type; > + > + static hashval_t > + hash (enum rtx_code code, machine_mode value_mode, machine_mode > cmp_op_mode) > + { > +inchash::hash hstate (0); > +hstate.add_int (code); > +hstate.add_int (value_mode); > +hstate.add_int (cmp_op_mode); > +return hstate.end (); > + } > + > + static hashval_t > + hash (const rtx ) > + { > +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > + } > + > + static bool > + equal (const rtx , const binop_key ) > + { > +return (GET_CODE (ref1) == ref2.code) > +&& (GET_MODE (ref1) == ref2.value_mode) > +&& (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > + } > +}; > + > +static GTY ((cache)) hash_table *cached_binops; > + > +static rtx > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > + machine_mode cmp_op_mode) > +{ > + if (!cached_binops) > +cached_binops = hash_table::create_ggc (1024); > + binop_key key = { code, value_mode, cmp_op_mode }; > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > + if (!*slot) > +*slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > + gen_reg_rtx (cmp_op_mode)); > + return *slot; > +} Sorry, I didn't mean anything this complicated. I just meant that we should have a single cached rtx that we can change via PUT_CODE and PUT_MODE_RAW for each new query, rather than allocating a new rtx each time. Something like: static GTY ((cache)) rtx cached_binop; rtx get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) { if (cached_binop) { PUT_CODE (cached_binop, code); PUT_MODE_RAW (cached_binop, mode); PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); } else { rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); } return cached_binop; } Thanks, Richard
[PATCH v2 3/9] Introduce can_vector_compare_p function
z13 supports only non-signaling vector comparisons. This means we cannot vectorize LT, LE, GT, GE and LTGT when compiling for z13. However, we cannot express this restriction today: the code only checks whether vcond$a$b optab exists, which does not contain information about the operation. Introduce a function that checks whether back-end supports vector comparisons with individual rtx codes by matching vcond expander's third argument with a fake comparison with the corresponding rtx code. gcc/ChangeLog: 2019-08-21 Ilya Leoshkevich * Makefile.in (GTFILES): Add optabs.c. * optabs-tree.c (expand_vec_cond_expr_p): Use can_vector_compare_p. * optabs.c (binop_key): Binary operation cache key. (binop_hasher): Binary operation cache hasher. (cached_binops): Binary operation cache. (get_cached_binop): New function that returns a cached binary operation or creates a new one. (can_vector_compare_p): New function. * optabs.h (enum can_vector_compare_purpose): New enum. Not really needed today, but can be used to extend the support to e.g. vec_cmp if need arises. (can_vector_compare_p): New function. --- gcc/Makefile.in | 2 +- gcc/optabs-tree.c | 11 +-- gcc/optabs.c | 79 +++ gcc/optabs.h | 15 + 4 files changed, 104 insertions(+), 3 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 597dc01328b..d2207da5657 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2541,7 +2541,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/function.c $(srcdir)/except.c \ $(srcdir)/ggc-tests.c \ $(srcdir)/gcse.c $(srcdir)/godump.c \ - $(srcdir)/lists.c $(srcdir)/optabs-libfuncs.c \ + $(srcdir)/lists.c $(srcdir)/optabs.c $(srcdir)/optabs-libfuncs.c \ $(srcdir)/profile.c $(srcdir)/mcf.c \ $(srcdir)/reg-stack.c $(srcdir)/cfgrtl.c \ $(srcdir)/stor-layout.c \ diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 8157798cc71..e68bb39c021 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -23,7 +23,10 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "target.h" #include "insn-codes.h" +#include "rtl.h" #include "tree.h" +#include "memmodel.h" +#include "optabs.h" #include "optabs-tree.h" #include "stor-layout.h" @@ -347,8 +350,12 @@ expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum tree_code code) || maybe_ne (GET_MODE_NUNITS (value_mode), GET_MODE_NUNITS (cmp_op_mode))) return false; - if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), - TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing + bool unsigned_p = TYPE_UNSIGNED (cmp_op_type); + if (((get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), +unsigned_p) == CODE_FOR_nothing) + || !can_vector_compare_p (get_rtx_code (code, unsigned_p), +TYPE_MODE (value_type), +TYPE_MODE (cmp_op_type), cvcp_vcond)) && ((code != EQ_EXPR && code != NE_EXPR) || get_vcond_eq_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing)) diff --git a/gcc/optabs.c b/gcc/optabs.c index 9e54dda6e7f..07b4d824822 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "recog.h" #include "diagnostic-core.h" #include "rtx-vector-builder.h" +#include "hash-table.h" /* Include insn-config.h before expr.h so that HAVE_conditional_move is properly defined. */ @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, return 0; } +/* can_vector_compare_p presents fake rtx binary operations to the the back-end + in order to determine its capabilities. In order to avoid creating fake + operations on each call, values from previous calls are cached in a global + cached_binops hash_table. It contains rtxes, which can be looked up using + binop_keys. */ + +struct binop_key { + enum rtx_code code;/* Operation code. */ + machine_mode value_mode; /* Result mode. */ + machine_mode cmp_op_mode; /* Operand mode.*/ +}; + +struct binop_hasher : pointer_hash_mark, ggc_cache_remove { + typedef rtx value_type; + typedef binop_key compare_type; + + static hashval_t + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) + { +inchash::hash hstate (0); +hstate.add_int (code); +hstate.add_int (value_mode); +hstate.add_int (cmp_op_mode); +return hstate.end (); + } + + static hashval_t + hash (const rtx ) + { +return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); + } + + static bool + equal (const rtx , const binop_key ) + { +return (GET_CODE (ref1) == ref2.code) + && (GET_MODE