[committed] [PR other/104044] Remove extraneous semicolons
As noted in BZ104044 we've got a few places where we have extraneous semicolons. Committed as obvious to the trunk, Jeff commit 6d82e0fea5f988e829912aaa70a9964a81ad4e5e Author: Jeff Law Date: Sat Nov 19 19:21:37 2022 -0700 [PR other/104044] Remove extraneous semicolons gcc/ PR other/104044 * config/mn10300/mn10300.cc (mn10300_print_operand): Remove extraneous semicolon. * config/nvptx/nvptx.cc (nvptx_goacc_reduction_fini): Likewise. gcc/jit/ PR other/104044 * jit-playback.cc (playback::lvale::mark_addressable): Remove extraeous semicolon diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc index 2a58dd9256d..f887d9e2808 100644 --- a/gcc/config/mn10300/mn10300.cc +++ b/gcc/config/mn10300/mn10300.cc @@ -285,12 +285,12 @@ mn10300_print_operand (FILE *file, rtx x, int code) REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), val); fprintf (file, "0x%lx", val[0]); - break;; + break; case E_SFmode: REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val[0]); fprintf (file, "0x%lx", val[0]); - break;; + break; case E_VOIDmode: case E_DImode: mn10300_print_operand_address (file, @@ -344,7 +344,7 @@ mn10300_print_operand (FILE *file, rtx x, int code) REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), val); fprintf (file, "0x%lx", val[1]); - break;; + break; case E_SFmode: gcc_unreachable (); case E_VOIDmode: diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 2fe120b3873..ed3807bee43 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -7043,7 +7043,7 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa) enum tree_code op = (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4)); gimple_seq seq = NULL; - tree r = NULL_TREE;; + tree r = NULL_TREE; push_gimplify_context (true); diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc index bf006903a44..069ed705609 100644 --- a/gcc/jit/jit-playback.cc +++ b/gcc/jit/jit-playback.cc @@ -1647,7 +1647,7 @@ bool playback::lvalue:: mark_addressable (location *loc) { - tree x = as_tree ();; + tree x = as_tree (); while (1) switch (TREE_CODE (x))
[committed] Fix test to not depend on DECL_UIDs
The tester started tripping this on s390-linux-gnu: Tests that now fail, but worked before (19 tests): gcc.dg/pr96542.c scan-tree-dump-times evrp "254" 2 The problem is we search for "254" in the dump file. The dump file contains UIDs for function declarations. So changes in the number of predefined DECL nodes can make the test pass or file depending on whether or not a decl with a UID containing "254" shows up. Like this: ;; Function foo (foo, funcdef_no=0, decl_uid=2542, cgraph_uid=1, symbol_order=0) ISTM the test wants to look for a "return 254" rather than just "254". I added a change for that to the tester. Naturally that fixed the test on s390 and the dozen or so targets I tested didn't show any regressions. Installing on the trunk, Jeff commit 53a6b2e0d3405c2a4de28a3e065837d5d55f4336 Author: Jeff Law Date: Sat Nov 19 20:47:20 2022 -0500 Fix test to not depend on DECL_UIDs The tester started tripping this on s390-linux-gnu: Tests that now fail, but worked before (19 tests): gcc.dg/pr96542.c scan-tree-dump-times evrp "254" 2 The problem is we search for "254" in the dump file. The dump file contains UIDs for function declarations. So changes in the number of predefined DECL nodes can make the test pass or file depending on whether or not a decl with a UID containing "254" shows up. Like this: ;; Function foo (foo, funcdef_no=0, decl_uid=2542, cgraph_uid=1, symbol_order=0) ISTM the test wants to look for a "return 254" rather than just "254". I added a change for that to the tester. Naturally that fixed the test on s390 and the dozen or so targets I tested didn't show any regressions. gcc/testsuite * gcc.dg/pr96542.c: Avoid falsely matching DECL_UIDs with the number 254 in them. diff --git a/gcc/testsuite/gcc.dg/pr96542.c b/gcc/testsuite/gcc.dg/pr96542.c index 5014f2acad8..0aad2e9494e 100644 --- a/gcc/testsuite/gcc.dg/pr96542.c +++ b/gcc/testsuite/gcc.dg/pr96542.c @@ -22,6 +22,6 @@ baz (unsigned int x) return (-1U >> x) * 16; } -/* { dg-final { scan-tree-dump-times "254" 2 "evrp" } } */ +/* { dg-final { scan-tree-dump-times "return 254" 2 "evrp" } } */ /* { dg-final { scan-tree-dump "= PHI <32.*, 4294967280" "evrp" } } */
[PATCH] Fix PR 106560: Another ICE after conflicting types of redeclaration
From: Andrew Pinski This another one of these ICE after error issues with the gimplifier and a fallout from r12-3278-g823685221de986af. The problem here is gimplify_modify_expr does not check if either from or to was an error operand. This adds the check and fixes the ICE. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * gimplify.cc (gimplify_modify_expr): If either *from_p or *to_p were error_operand return early. gcc/testsuite/ChangeLog: * gcc.dg/redecl-23.c: New test. * gcc.dg/redecl-24.c: New test. * gcc.dg/redecl-25.c: New test. --- gcc/gimplify.cc | 3 +++ gcc/testsuite/gcc.dg/redecl-23.c | 6 ++ gcc/testsuite/gcc.dg/redecl-24.c | 6 ++ gcc/testsuite/gcc.dg/redecl-25.c | 9 + 4 files changed, 24 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/redecl-23.c create mode 100644 gcc/testsuite/gcc.dg/redecl-24.c create mode 100644 gcc/testsuite/gcc.dg/redecl-25.c diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index c62a966e918..02415cb1b5c 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -6054,6 +6054,9 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, location_t loc = EXPR_LOCATION (*expr_p); gimple_stmt_iterator gsi; + if (error_operand_p (*from_p) || error_operand_p (*to_p)) +return GS_ERROR; + gcc_assert (TREE_CODE (*expr_p) == MODIFY_EXPR || TREE_CODE (*expr_p) == INIT_EXPR); diff --git a/gcc/testsuite/gcc.dg/redecl-23.c b/gcc/testsuite/gcc.dg/redecl-23.c new file mode 100644 index 000..425721df2ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/redecl-23.c @@ -0,0 +1,6 @@ +/* We used to ICE in the gimplifier, PR 106560. */ +/* { dg-do compile } */ +/* { dg-options "-w" } */ +void **a; /* { dg-note "" } */ +void b() { void **c = a; } +a; /* { dg-error "" } */ diff --git a/gcc/testsuite/gcc.dg/redecl-24.c b/gcc/testsuite/gcc.dg/redecl-24.c new file mode 100644 index 000..f0f7a723ab8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/redecl-24.c @@ -0,0 +1,6 @@ +/* We used to ICE in the gimplifier, PR 106560 */ +/* { dg-do compile } */ +/* { dg-options "-w" } */ +void **a, **b; /* { dg-note "" } */ +c(){b = a;} +a = /* { dg-error "" } */ diff --git a/gcc/testsuite/gcc.dg/redecl-25.c b/gcc/testsuite/gcc.dg/redecl-25.c new file mode 100644 index 000..4232e19d9a7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/redecl-25.c @@ -0,0 +1,9 @@ +/* We used to ICE in the gimplifier, PR 106560 */ +/* { dg-do compile } */ +/* { dg-options "-w" } */ +void **a; /* { dg-note "" } */ +void b() { + void **c; +c = a /* { dg-error "" } */ +} +a; /* { dg-error "" } */ -- 2.27.0
Re: [Patch] gcn: Add __builtin_gcn_{get_stack_limit,first_call_this_thread_p}
On 19/11/2022 10:46, Tobias Burnus wrote: On 18.11.22 18:49, Andrew Stubbs wrote: On 18/11/2022 17:20, Tobias Burnus wrote: This looks wrong: + /* stackbase = (stack_segment_decr & 0x) + + stack_wave_offset); + seg_size = dispatch_ptr->private_segment_size; + stacklimit = stackbase + seg_size*64; (this should be '*seg_size' not 'seg_size' and the name should be s/seg_size/seg_size_ptr/.) Yes, looking again I think the comment is misleading, but the code has the MEM so the dereference is there. + with segsize = dispatch_ptr + 6*sizeof(int16_t) + 3*sizeof(int32_t); + cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */ + rtx ptr; + if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0 + && cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) + { + rtx size_rtx = gen_rtx_REG (DImode, + cfun->machine->args.reg[DISPATCH_PTR_ARG]); + size_rtx = gen_rtx_MEM (DImode, + gen_rtx_PLUS (DImode, size_rtx, + GEN_INT (6*16 + 3*32))); + size_rtx = gen_rtx_MULT (DImode, size_rtx, GEN_INT (64)); + (Reading it, I think it should be '..._MEM(SImode,' and '..._MULT(SImode' instead of DImode.) Yes, I think you're right; the field is uint32. Admittedly, there is probably something not quite right as I see with gfx908 # of expected passes 27476 # of unexpected failures 317 where 317 FAIL comes from 88 testcase files. That's not a a very high number but more than the usual fails, which shows that something is not quite right. * * * I am pretty sure that I missed something - but the question is what. I hope you can help me pinpoint the place where it goes wrong. This might be it: + if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0) + { + rtx off; + off = gen_rtx_REG (SImode, + cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]); + ptr = gen_rtx_PLUS (DImode, ptr, off); + } I think "off" needs to be zero-extended before you can add the SImode to DImode (same for the segment size, of course). Andrew
[committed] libstdc++: Add always_inline to trivial range access functions
Tested x86_64-linux. Pushed to trunk. -- >8 -- This makes all the [iterator.range] functions always-inline, except the ones that construct a std::reverse_iterator, as they do a little more work. They could probably be made always_inline too though, and maybe the std::reverse_iterator constructor too. This means that even for -O0 these functions have no runtime overhead compared with calling a member of the container, or performing pointer arithmetic for arrays. libstdc++-v3/ChangeLog: * include/bits/range_access.h: Add always_inline attribute to trivial functions. --- libstdc++-v3/include/bits/range_access.h | 53 +--- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/libstdc++-v3/include/bits/range_access.h b/libstdc++-v3/include/bits/range_access.h index 78fdfe66035..241f5417eec 100644 --- a/libstdc++-v3/include/bits/range_access.h +++ b/libstdc++-v3/include/bits/range_access.h @@ -47,7 +47,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto begin(_Container& __cont) -> decltype(__cont.begin()) { return __cont.begin(); } @@ -58,7 +58,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto begin(const _Container& __cont) -> decltype(__cont.begin()) { return __cont.begin(); } @@ -69,7 +69,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto end(_Container& __cont) -> decltype(__cont.end()) { return __cont.end(); } @@ -80,7 +80,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto end(const _Container& __cont) -> decltype(__cont.end()) { return __cont.end(); } @@ -90,7 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __arr Array. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX14_CONSTEXPR _Tp* begin(_Tp (&__arr)[_Nm]) noexcept { return __arr; } @@ -101,7 +101,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __arr Array. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX14_CONSTEXPR _Tp* end(_Tp (&__arr)[_Nm]) noexcept { return __arr + _Nm; } @@ -121,7 +121,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] constexpr auto cbegin(const _Container& __cont) noexcept(noexcept(std::begin(__cont))) -> decltype(std::begin(__cont)) @@ -133,7 +133,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] constexpr auto cend(const _Container& __cont) noexcept(noexcept(std::end(__cont))) -> decltype(std::end(__cont)) @@ -145,7 +145,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto rbegin(_Container& __cont) -> decltype(__cont.rbegin()) { return __cont.rbegin(); } @@ -156,7 +156,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto rbegin(const _Container& __cont) -> decltype(__cont.rbegin()) { return __cont.rbegin(); } @@ -167,7 +167,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto rend(_Container& __cont) -> decltype(__cont.rend()) { return __cont.rend(); } @@ -178,7 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto rend(const _Container& __cont) -> decltype(__cont.rend()) { return __cont.rend(); } @@ -233,7 +233,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param __cont Container. */ template -[[__nodiscard__]] +[[__nodiscard__, __gnu__::__always_inline__]] inline _GLIBCXX17_CONSTEXPR auto crbegin(const _Container& __cont) -> decltype(std::rbegin(__cont)) { return std::rbegin(__cont); } @@ -244,7 +244,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @param
Re: [PATCH] RISC-V: branch-(not)equals-zero compares against $zero
On 11/17/22 21:53, Palmer Dabbelt wrote: On Thu, 17 Nov 2022 14:44:31 PST (-0800), jeffreya...@gmail.com wrote: On 11/8/22 12:55, Philipp Tomsich wrote: If we are testing a register or a paradoxical subreg (i.e. anything that is not a partial subreg) for equality/non-equality with zero, we can generate a branch that compares against $zero. This will work for QI, HI, SI and DImode, so we enable this for ANYI. 2020-08-30 gcc/ChangeLog: * config/riscv/riscv.md (*branch_equals_zero): Added pattern. I've gone back an forth on this a few times. As you know, I hate subregs in the target descriptions and I guess I need to extend that to querying if something is a subreg or not rather than just subregs appearing in the RTL. Presumably the idea behind rejecting partial subregs is the bits outside the partial is unspecified, but that's also going to be true if we're looking at a hardreg in QImode (for example) irrespective of it being wrapped in a subreg. I don't doubt it works the vast majority of the time, but I haven't been able to convince myself it'll work all the time. How do we ensure that the bits outside the mode are zero? I've been bitten by this kind of problem before, and it's safe to say it was exceedingly painful to find. I don't really understand the middle-end issues here (if there are any?), but I'm pretty sure code like this has passed by a few times before and we've yet to find a reliable way to optimize these cases. There's a bunch of patterns where knowing the XLEN-extension of shorter values would let us generate better code, but there's also cases where we'd generate worse code by ensure any extension scheme is followed. It's not really the extension scheme, though that is a subset of the concerns in this space. Essentially we have to be 100% sure that the bits outside of the branch mode (QI/HI/SI) and XLEN are zero, it's not just the sign bit. This becomes even more of a concern as we exploit the bitmanip extensions more aggressively. The SUBREG check is supposed to avoid that problem, but I'm not convinced it's sufficient. Philipp claims that PROMOTE_MODE plus WORD_REGISTER_OPERATIONS is sufficient here, but I'm not sure that's the case. He's digging out the rationale from some internal archives which we'll dig into once he finds it. I'd be happy to be proved wrong :-) jeff
Re: [PATCH] constexprify some tree variables
On 11/18/22 19:53, Andrew Pinski wrote: On Fri, Nov 18, 2022 at 12:06 PM Jeff Law via Gcc-patches wrote: On 11/18/22 11:05, apinski--- via Gcc-patches wrote: From: Andrew Pinski Since we use C++11 by default now, we can use constexpr for some const decls in tree-core.h. This patch does that and it allows for better optimizations of GCC code with checking enabled and without LTO. For an example generic-match.cc compiling is speed up due to the less number of basic blocks and less debugging info produced. I did not check the speed of compiling the same source but rather the speed of compiling the old vs new sources here (but with the same compiler base). The small slow down in the parsing of the arrays in each TU is migrated by a speed up in how much code/debugging info is produced in the end. Note I looked at generic-match.cc since it is one of the compiling sources which causes parallel building to stall and I wanted to speed it up. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. Or should this wait until GCC 13 branches off? gcc/ChangeLog: PR middle-end/14840 * tree-core.h (tree_code_type): Constexprify by including all-tree.def. (tree_code_length): Likewise. * tree.cc (tree_code_type): Remove. (tree_code_length): Remove. I would have preferred this a week ago :-) And if it was just const-ifying, I'd ACK it without hesitation. Yes I know which is why I am ok with waiting for GCC 14 really. I decided to try to clear out some of the old bug reports assigned to myself and this one was one of the oldest and also one of the easiest to do. Let's go ahead. It addresses a bug and should be quite safe. Jeff
[committed] libstdc++: Fix -Wsign-compare warnings in std::format
Tested x86_64-linux. Pushed to trunk. -- >8 -- libstdc++-v3/ChangeLog: * include/std/format: Fix -Wsign-compare warnings. --- libstdc++-v3/include/std/format | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index f4fc85a16d2..561ae161d16 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -291,7 +291,7 @@ namespace __format } else { - constexpr size_t __n = 32; + constexpr int __n = 32; char __buf[__n]{}; for (int __i = 0; __i < __n && __first != __last; ++__i) __buf[__i] = __first[__i]; @@ -1544,7 +1544,7 @@ namespace __format if (size_t __extras = int(__d == __p) + __z) { - if (__dynbuf.empty() && __extras <= (__end - __res.ptr)) + if (__dynbuf.empty() && __extras <= size_t(__end - __res.ptr)) { // Move exponent to make space for extra chars. __builtin_memmove(__start + __p + __extras, @@ -2357,7 +2357,7 @@ namespace __format _M_write(_CharT __c) { *_M_next++ = __c; - if (_M_next - _M_span.begin() == _M_span.size()) [[unlikely]] + if (_M_next - _M_span.begin() == std::ssize(_M_span)) [[unlikely]] _M_overflow(); } @@ -2469,7 +2469,7 @@ namespace __format auto __used = this->_M_used(); if (_M_max < 0) // No maximum. _M_out = ranges::copy(__used, std::move(_M_out)).out; - else if (_M_count < _M_max) + else if (_M_count < size_t(_M_max)) { auto __max = _M_max - _M_count; span<_CharT> __first; -- 2.38.1
[committed] libstdc++: Fix Doxygen warning
Tested x86_64-linux. Pushed to trunk. -- >8 -- This fixes a Doxygen warning about a mismatched parameter name. The standard uses 'r' here, like the Doxygen comment, so use '__r' instead of '__e'. libstdc++-v3/ChangeLog: * include/bits/ptr_traits.h (pointer_traits::pointer_to): Rename parameter. --- libstdc++-v3/include/bits/ptr_traits.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/include/bits/ptr_traits.h b/libstdc++-v3/include/bits/ptr_traits.h index 71370ff4fc9..b46a1ac34c9 100644 --- a/libstdc++-v3/include/bits/ptr_traits.h +++ b/libstdc++-v3/include/bits/ptr_traits.h @@ -104,17 +104,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Obtain a pointer to an object * @param __r A reference to an object of type `element_type` - * @return `pointer::pointer_to(__e)` - * @pre `pointer::pointer_to(__e)` is a valid expression. + * @return `pointer::pointer_to(__r)` + * @pre `pointer::pointer_to(__r)` is a valid expression. */ static pointer - pointer_to(element_type& __e) + pointer_to(element_type& __r) #if __cpp_lib_concepts requires requires { - { pointer::pointer_to(__e) } -> convertible_to; + { pointer::pointer_to(__r) } -> convertible_to; } #endif - { return pointer::pointer_to(__e); } + { return pointer::pointer_to(__r); } }; // Do not define pointer_traits::pointer_to if element type is void. -- 2.38.1
[committed] libstdc++: Fix one more malformed requires-clause [PR107649]
Tested x86_64-linux. Pushed to trunk. -- >8 -- libstdc++-v3/ChangeLog: PR libstdc++/107649 * include/std/complex (__complex_proj): Fix requires-clause. --- libstdc++-v3/include/std/complex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex index 86d5ca3c3e1..912adb78aaa 100644 --- a/libstdc++-v3/include/std/complex +++ b/libstdc++-v3/include/std/complex @@ -2528,7 +2528,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif template -requires(__complex_type<_Tp>::type) +requires requires { typename __complex_type<_Tp>::type; } inline complex<_Tp> __complex_proj(const complex<_Tp>& __z) { return __complex_proj(__z.__rep()); } -- 2.38.1
Re: [PATCH] Fix in _GLIBCXX_INLINE_VERSION mode
On Sat, 19 Nov 2022 at 13:03, François Dumont via Libstdc++ wrote: > > Without this qualification I have this in _GLIBCXX_INLINE_VERSION mode: > > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: > note: candidate: 'template bool std::__9::isxdigit(_CharT, > const locale&)' > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: > note: template argument deduction/substitution failed: > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1540: > note: candidate expects 2 arguments, 1 provided > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: > error: no matching function for call to 'isxdigit(const > std::__9::basic_string_view::value_type&)' > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: > note: candidate: 'template bool std::__9::isxdigit(_CharT, > const locale&)' > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: > note: template argument deduction/substitution failed: > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: > note: candidate expects 2 arguments, 1 provided > compiler exited with status 1 > FAIL: 17_intro/headers/c++2020/all_attributes.cc (test for excess errors) > Excess errors: > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1540: > error: no matching function for call to 'isxdigit(const > std::__9::basic_string_view::value_type&)' > /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: > error: no matching function for call to 'isxdigit(const > std::__9::basic_string_view::value_type&)' > > It sounds like the most reasonable fix as this is how toupper is being > called. I think the real problem is that include/c_global/cctype is missing the NAMESPACE_VERSION macros. All declarations of std::isxdigit etc should be in the same namespace, precisely so we don't need to do this. > > libstdc++: Add missing std qualification on isxdigit calls > > libstdc++-v3/ChangeLog > > * include/std/format: Add std qualification on isxdigit calls. > > Ok to commit ? Yes, OK.
[PATCH] Fix in _GLIBCXX_INLINE_VERSION mode
Without this qualification I have this in _GLIBCXX_INLINE_VERSION mode: /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: note: candidate: 'template bool std::__9::isxdigit(_CharT, const locale&)' /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: note: template argument deduction/substitution failed: /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1540: note: candidate expects 2 arguments, 1 provided /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: error: no matching function for call to 'isxdigit(const std::__9::basic_string_view::value_type&)' /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: note: candidate: 'template bool std::__9::isxdigit(_CharT, const locale&)' /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/locale_facets.h:2649: note: template argument deduction/substitution failed: /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: note: candidate expects 2 arguments, 1 provided compiler exited with status 1 FAIL: 17_intro/headers/c++2020/all_attributes.cc (test for excess errors) Excess errors: /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1540: error: no matching function for call to 'isxdigit(const std::__9::basic_string_view::value_type&)' /home/fdt/dev/gcc/build_versioned_ns/x86_64-pc-linux-gnu/libstdc++-v3/include/format:1630: error: no matching function for call to 'isxdigit(const std::__9::basic_string_view::value_type&)' It sounds like the most reasonable fix as this is how toupper is being called. libstdc++: Add missing std qualification on isxdigit calls libstdc++-v3/ChangeLog * include/std/format: Add std qualification on isxdigit calls. Ok to commit ? François diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index f4fc85a16d2..9f5b7bee2be 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -1537,7 +1537,7 @@ namespace __format if (__trailing_zeros) { - if (!isxdigit(__s[0])) + if (!std::isxdigit(__s[0])) --__sigfigs; __z = __prec - __sigfigs; } @@ -1627,7 +1627,7 @@ namespace __format { __fill_char = _CharT('0'); // Write sign before zero filling. - if (!isxdigit(__narrow_str[0])) + if (!std::isxdigit(__narrow_str[0])) { *__out++ = __str[0]; __str.remove_prefix(1);
Re: why does gcc jit require pthread?
在 2022-11-19 19:27, Jonathan Wakely 写道: I rebased the patch and re-tested with those options, and all tests passed again: === jit Summary === # of expected passes15081 The patch is OK for trunk if you have favorable answers for the above two questions. Thanks, I've pushed it to trunk now. Thank you for taking care of it! -- Best regards, LIU Hao OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH RFA] libstdc++: add experimental Contracts support
On Sat, 19 Nov 2022 at 02:40, Jason Merrill wrote: > > Thanks, this is what I'm pushing: > Great. I wonder if we should move the contents of libstdc++fs.a and libstdc++_libbacktrace.a into libstdc++exp.a and make the former libraries into linker scripts that point to libstdc++exp.a It would be easier to only have one lib for unstable things.
Re: [PATCH] 15/19 modula2 front end: cc1gm2 additional non modula2 source files
Richard Biener writes: >> +/* We don't use language_function. */ > > well ... oops, yes - I'll remove the comment! >> +struct GTY (()) language_function >> +{ >> + >> + /* While we are parsing the function, this contains information about >> + the statement-tree that we are building. */ >> + /* struct stmt_tree_s stmt_tree; */ >> + tree stmt_tree; > > ... but this? > >> +}; >> + >> +/* end of new stuff. */ >> + >> +/* Language hooks. */ >> + >> +bool >> +gm2_langhook_init (void) >> +{ >> + build_common_tree_nodes (false); >> + >> + /* I don't know why this has to be done explicitly. */ >> + void_list_node = build_tree_list (NULL_TREE, void_type_node); > > it's now done in build_common_tree_nodes thanks >> + build_common_builtin_nodes (); >> + >> + /* The default precision for floating point numbers. This is used >> + for floating point constants with abstract type. This may eventually >> + be controllable by a command line option. */ >> + mpfr_set_default_prec (256); >> + >> + /* GNU Modula-2 uses exceptions. */ >> + using_eh_for_cleanups (); >> + >> + return true; >> +} >> + >> +/* The option mask. */ >> + >> +static unsigned int >> +gm2_langhook_option_lang_mask (void) >> +{ >> + return CL_ModulaX2; >> +} >> + >> +/* Initialize the options structure. */ >> + >> +static void >> +gm2_langhook_init_options_struct (struct gcc_options *opts) >> +{ >> + /* Default to avoiding range issues for complex multiply and divide. */ >> + opts->x_flag_complex_method = 2; >> + >> + /* The builtin math functions should not set errno. */ >> + opts->x_flag_errno_math = 0; >> + opts->frontend_set_flag_errno_math = true; >> + >> + /* Exceptions are used to handle recovering from panics. */ >> + opts->x_flag_exceptions = 1; >> + opts->x_flag_non_call_exceptions = 1; > > whohoo - really non-call-exceptions? ah thankyou. Now removed, bootstrapped and regression tests pass. >> + init_FrontEndInit (); >> +} >> + [snip] >> +static tree >> +gm2_langhook_type_for_mode (machine_mode mode, int unsignedp) >> +{ >> + tree type; >> + >> + if (VECTOR_MODE_P (mode)) >> +{ >> + tree inner; >> + >> + inner = gm2_langhook_type_for_mode (GET_MODE_INNER (mode), unsignedp); >> + if (inner != NULL_TREE) >> +return build_vector_type_for_mode (inner, mode); >> + return NULL_TREE; >> +} >> + >> + scalar_int_mode imode; >> + scalar_float_mode fmode; >> + complex_mode cmode; >> + if (is_int_mode (mode, )) >> +return gm2_langhook_type_for_size (GET_MODE_BITSIZE (imode), unsignedp); >> + else if (is_float_mode (mode, )) >> +{ >> + switch (GET_MODE_BITSIZE (fmode)) >> +{ >> +case 32: >> + return float_type_node; >> +case 64: >> + return double_type_node; > > Have a look at lto/lto-lang.cc where we match the global types with thanks will do! > if (mode == TYPE_MODE (float_type_node)) > return float_type_node; > > I think that's better than relying on the size statically as you do > above. yes indeed >> +default: >> + // We have to check for long double in order to support >> + // i386 excess precision. >> + if (fmode == TYPE_MODE (long_double_type_node)) >> +return long_double_type_node; >> +} >> +} >> + else if (is_complex_float_mode (mode, )) >> +{ >> + switch (GET_MODE_BITSIZE (cmode)) >> +{ >> +case 64: >> + return complex_float_type_node; >> +case 128: >> + return complex_double_type_node; >> +default: >> + // We have to check for long double in order to support >> + // i386 excess precision. >> + if (cmode == TYPE_MODE (complex_long_double_type_node)) >> +return complex_long_double_type_node; >> +} >> +} >> + >> +#if HOST_BITS_PER_WIDE_INT >= 64 >> + >> + /* The middle-end and some backends rely on TImode being supported >> + for 64-bit HWI. */ >> + if (mode == TImode) >> +{ >> + type = build_nonstandard_integer_type (GET_MODE_BITSIZE (TImode), >> + unsignedp); >> + if (type && TYPE_MODE (type) == TImode) >> +return type; >> +} >> +#endif > > Instead of this block look at c-family/c-common.cc which does > > for (i = 0; i < NUM_INT_N_ENTS; i ++) > if (int_n_enabled_p[i] > && mode == int_n_data[i].m) > return (unsignedp ? int_n_trees[i].unsigned_type > : int_n_trees[i].signed_type); ok will do - thanks for the direction. > it might be practical to factor out handling of the global tree nodes into > a function in the middle-end that frontends can call after processing modes > it has special types for. yes it sounds like a common front end use. >> + return NULL_TREE; >> +} >> + [snip] >> +/* m2_write_global_declarations writes out globals by coping into a vec >> + and calling wrapup_global_declarations. */ >>
Re: why does gcc jit require pthread?
On Tue, 15 Nov 2022 at 19:01, Jonathan Wakely wrote: > > On Tue, 15 Nov 2022 at 18:50, David Malcolm wrote: > > > > [Fixing typo in the Subject ("git" -> "jit" ); CCing jit mailing list] > > > > On Fri, 2022-11-11 at 17:16 +, Jonathan Wakely wrote: > > > On Mon, 7 Nov 2022 at 13:51, Jonathan Wakely wrote: > > > > > > > > On Mon, 7 Nov 2022 at 13:33, LIU Hao wrote: > > > > > > > > > > 在 2022-11-07 20:57, Jonathan Wakely 写道: > > > > > > It would be a lot nicer if playback::context met the C++ > > > > > > Lockable > > > > > > requirements, and playback::context::compile () could just take > > > > > > a > > > > > > scoped lock on *this: > > > > > > > > > > > > > > > > > > > > > > Yeah yeah that makes a lot of sense. Would you please just commit > > > > > that? I don't have write access to > > > > > GCC repo, and it takes a couple of hours for me to bootstrap GCC > > > > > just for this tiny change. > > > > > > > > Somebody else needs to approve it first. I'll combine our patches > > > > and > > > > test and submit it properly for approval. > > > > > > Here's a complete patch that actually builds now, although I'm seeing > > > a stage 2 vs stage 3 comparison error which I don't have time to look > > > into right now. > > > > I confess that I'm not familiar with C++11's mutex and locking types, > > but having read through the relevant entries on cppreference.com, the > > patch looks correct to me. > > > > Are these classes well-supported on the minimum compiler version we > > support? (Jonathan, I defer to your judgement here) > > std::mutex has been supported since 4.4.0 and is very simple. The > implementation on trunk is identical to the one in gcc 4.8.5 except > for adding 'noexcept' to mutex::native_handle (), which is not > relevant to this change. > > > Jonathan: you said in your followup email that it "bootstraps and > > passes testing on x86_64-pc-linux-gnu (CentOS 8 Stream)". This is > > possibly a silly question, but did this testing include the jit > > testsuite? A gotcha here is that --enable-languages=all does *not* > > enable jit. > > Yes, I built with --enable-languages=c,c++,jit --enable-host-shared I rebased the patch and re-tested with those options, and all tests passed again: === jit Summary === # of expected passes15081 > > The patch is OK for trunk if you have favorable answers for the above > > two questions. Thanks, I've pushed it to trunk now.
Re: [Patch] gcn: Add __builtin_gcn_{get_stack_limit,first_call_this_thread_p}
On 18.11.22 18:49, Andrew Stubbs wrote: On 18/11/2022 17:20, Tobias Burnus wrote: This looks wrong: +/* stackbase = (stack_segment_decr & 0x) ++ stack_wave_offset); + seg_size = dispatch_ptr->private_segment_size; + stacklimit = stackbase + seg_size*64; (this should be '*seg_size' not 'seg_size' and the name should be s/seg_size/seg_size_ptr/.) + with segsize = dispatch_ptr + 6*sizeof(int16_t) + 3*sizeof(int32_t); + cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */ +rtx ptr; +if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0 +&& cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) + { +rtx size_rtx = gen_rtx_REG (DImode, + cfun->machine->args.reg[DISPATCH_PTR_ARG]); +size_rtx = gen_rtx_MEM (DImode, +gen_rtx_PLUS (DImode, size_rtx, + GEN_INT (6*16 + 3*32))); +size_rtx = gen_rtx_MULT (DImode, size_rtx, GEN_INT (64)); + (Reading it, I think it should be '..._MEM(SImode,' and '..._MULT(SImode' instead of DImode.) seg_size is calculated from the private_segment_size loaded from the dispatch_ptr, not calculated from the dispatch_ptr itself. Isn't this what thee code tries to do? Namely: My understanding is that dispatch_ptr->private_segment_size == *((char*)dispatch_ptr + 192) And the latter is what I attempt to do. I have a very limited knowledge of insn/rtx/RTL and of GCN assemply; thus, I likely have done something stupid. Having said this, Here is what I get: (Where asm("s4") == dispatch_ptr) s_add_u32 s2, s4, 192 s_addc_u32 s3, s5, 0 v_writelane_b32 v4, s2, 0 v_writelane_b32 v5, s3, 0 s_mov_b64 exec, 1 flat_load_dwordx2 v[4:5], v[4:5] s_waitcnt 0 v_lshlrev_b64 v[4:5], 6, v[4:5] v_readlane_b32 s2, v4, 0 v_readlane_b32 s3, v5, 0 Not that I really understand every line, but at a glance it looks okay. The 192 is because of (quoting newlib/libc/machine/amdgcn/getreent.c): typedef struct hsa_kernel_dispatch_packet_s { uint16_t header ; uint16_t setup; uint16_t workgroup_size_x ; uint16_t workgroup_size_y ; uint16_t workgroup_size_z; uint16_t reserved0; uint32_t grid_size_x ; uint32_t grid_size_y ; uint32_t grid_size_z; uint32_t private_segment_size; i.e. 6*16 + 3*32 = 192 – and we want to read a 32bit unsigned int. * * * Admittedly, there is probably something not quite right as I see with gfx908 # of expected passes27476 # of unexpected failures317 where 317 FAIL comes from 88 testcase files. That's not a a very high number but more than the usual fails, which shows that something is not quite right. * * * I am pretty sure that I missed something - but the question is what. I hope you can help me pinpoint the place where it goes wrong. Thanks, Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH 2/5] c++: Set the locus of the function result decl
Hi Jason! Possible test. An existing test might be to equip the existing warning for bool unsigned double meh(void) {return 0;} with a fix-it hint instead of the brief error: two or more data types in declaration of ‘meh’. Likewise for bool unsigned meh(void) {return 0;} error: ‘unsigned’ specified with ‘bool’ so we wouldn't need a plugin, and it might even be useful? ;) cheers, * g++.dg/plugin/plugin.exp: Add new test. * g++.dg/plugin/result-decl-plugin-test-1.C: New test. * g++.dg/plugin/result-decl-plugin-test-2.C: New test. * g++.dg/plugin/result_decl_plugin.C: New test. --- gcc/testsuite/g++.dg/plugin/plugin.exp| 3 + .../g++.dg/plugin/result-decl-plugin-test-1.C | 28 + .../g++.dg/plugin/result-decl-plugin-test-2.C | 61 +++ .../g++.dg/plugin/result_decl_plugin.C| 57 + 4 files changed, 149 insertions(+) create mode 100644 gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-1.C create mode 100644 gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-2.C create mode 100644 gcc/testsuite/g++.dg/plugin/result_decl_plugin.C diff --git a/gcc/testsuite/g++.dg/plugin/plugin.exp b/gcc/testsuite/g++.dg/plugin/plugin.exp index b5fb42fa77a..f2b526b4704 100644 --- a/gcc/testsuite/g++.dg/plugin/plugin.exp +++ b/gcc/testsuite/g++.dg/plugin/plugin.exp @@ -80,6 +80,9 @@ set plugin_test_list [list \ show-template-tree-color-labels.C \ show-template-tree-color-no-elide-type.C } \ { comment_plugin.c comments-1.C } \ +{ result_decl_plugin.C \ + result-decl-plugin-test-1.C \ + result-decl-plugin-test-2.C } \ ] foreach plugin_test $plugin_test_list { diff --git a/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-1.C b/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-1.C new file mode 100644 index 000..bd323181d70 --- /dev/null +++ b/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-1.C @@ -0,0 +1,28 @@ +/* Verify that class member functions result decl have the correct location. */ +// { dg-options "-fdiagnostics-generate-patch" } +namespace std { template < typename, typename > struct pair; } +template < typename > struct __mini_vector +{ + int _M_finish; + const + unsigned long + __attribute__((deprecated)) + _M_space_left() + { return _M_finish != 0; } +}; + template class __mini_vector< std::pair< long, long > >; + template class __mini_vector< int >; +#if 0 +{ dg-begin-multiline-output "" } +@@ -5,7 +5,7 @@ template < typename > struct __mini_vect + { + int _M_finish; + const +- unsigned long ++ bool + __attribute__((deprecated)) + _M_space_left() + { return _M_finish != 0; } + +{ dg-end-multiline-output "" } +#endif diff --git a/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-2.C b/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-2.C new file mode 100644 index 000..385a7ef482f --- /dev/null +++ b/gcc/testsuite/g++.dg/plugin/result-decl-plugin-test-2.C @@ -0,0 +1,61 @@ +/* Verify that template functions result decl have the correct location. */ +// { dg-options "-fdiagnostics-generate-patch" } +template +int +f() +{ + return 42; +} +int main() +{ + f(); +} +unsigned long long huh(void) +{ + return 1ULL; +} +#if 0 +{ dg-begin-multiline-output "" } +g++.dg/plugin/result-decl-plugin-test-2.C:4:1: warning: Function ‘f’ result location +4 | int + | ^~~ + | bool +g++.dg/plugin/result-decl-plugin-test-2.C:9:1: warning: Function ‘main’ result location +9 | int main() + | ^~~ + | bool +g++.dg/plugin/result-decl-plugin-test-2.C:13:28: warning: Function ‘huh’ result location + 13 | unsigned long long huh(void) + |^ + |bool +g++.dg/plugin/result-decl-plugin-test-2.C: In instantiation of ‘int f() [with T = int]’: +g++.dg/plugin/result-decl-plugin-test-2.C:11:10: required from here +g++.dg/plugin/result-decl-plugin-test-2.C:4:1: warning: Function ‘f’ result location +4 | int + | ^~~ + | bool +--- g++.dg/plugin/result-decl-plugin-test-2.C g++.dg/plugin/result-decl-plugin-test-2.C +@@ -1,16 +1,16 @@ + /* Verify that template functions result decl have the correct location. */ + // { dg-options "-fdiagnostics-generate-patch" } + template +-int ++bool + f() + { + return 42; + } +-int main() ++bool main() + { +f(); + } +-unsigned long long huh(void) ++unsigned long long huh(voidbool + { + return 1ULL; + } +{ dg-end-multiline-output "" } +#endif +// Note: f() should not +bbool with an off-by-one for the start 'b' ! diff --git a/gcc/testsuite/g++.dg/plugin/result_decl_plugin.C b/gcc/testsuite/g++.dg/plugin/result_decl_plugin.C new file mode 100644 index 000..40f54a6acfe --- /dev/null +++ b/gcc/testsuite/g++.dg/plugin/result_decl_plugin.C @@ -0,0 +1,57 @@ +/* A plugin example that points at the location of function decl result decl */ +/* This file is part of GCC */ +/* { dg-options "-O" } */
[PATCH] reg-stack: Fix a -fcompare-debug bug in reg-stack [PR107183]
Hi! As the following testcase shows, the swap_rtx_condition function in reg-stack can result in different code generation between -g and -g0. The function is doing the changes as it goes, so does analysis and changes together, which makes it harder to deal with DEBUG_INSNs, where normally analysis phase ignores them and the later phase doesn't. swap_rtx_condition walks instructions two different ways, one is using next_flags_user function which stops on non-call instructions that mention the flags register, and the other is a loop on fnstsw where it stops on instructions mentioning it and tries to find sahf instruction that uses it (in both cases calls stop it and so does end of basic block). Now both of these currently stop on DEBUG_INSNs that mention the flags register resp. the fnstsw result register. On success the function recurses on next flags user instruction if still live and if the recursion failed, reverts the changes it did too and fails. If it were just for the next_flags_user case, the fix could be just not doing INSN_CODE (insn) = -1; if (recog_memoized (insn) == -1) fail = 1; on DEBUG_INSNs (assuming all changes to those are fine), swap_rtx_condition_1 just changes one comparison to a different one. But due to the possibility of fnstsw result being used in theory before sahf in some DEBUG_INSNs, this patch takes a different approach. swap_rtx_condition has now a new argument and two modes. The first mode is when debug_seen is >= 0, in this case both next_flags_user and the loop for fnstsw -> sahf will ignore but note DEBUG_INSNs (that mention flags register or fnstsw result). If no such DEBUG_INSN is found during the whole call including recursive invocations (so e.g. for -g0 but probably most often for -g as well), it behaves as before, if it returns true all the changes are done and nothing further needs to be done later. If any DEBUG_INSNs are seen along the way, even when returning success all the changes are reverted, so it just reports that the function would be successful if DEBUG_INSNs were ignored. In this case, compare_for_stack_reg needs to call it again in debug_seen = -1 mode, which tells the function to update everything including DEBUG_INSNs. For the fnstsw -> sahf case which I hope will be very rare I just reset the DEBUG_INSNs, I don't really know how to express it easily otherwise. For the rest swap_rtx_condition_1 is done even on the DEBUG_INSNs. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? And after some time for release branches too? 2022-11-19 Jakub Jelinek PR target/107183 * reg-stack.cc (next_flags_user): Add DEBUG_SEEN argument. If >= 0 and a DEBUG_INSN would be otherwise returned, set DEBUG_SEEN to 1 and ignore it. (swap_rtx_condition): Add DEBUG_SEEN argument. In >= 0 mode only set DEBUG_SEEN to 1 if problematic DEBUG_ISNSs were seen and revert all changes on success in that case. Don't try to recog_memoized DEBUG_INSNs. (compare_for_stack_reg): Adjust swap_rtx_condition caller. If it returns true and debug_seen is 1, call swap_rtx_condition again with debug_seen -1. * gcc.dg/ubsan/pr107183.c: New test. --- gcc/reg-stack.cc.jj 2022-09-14 12:21:39.594178160 +0200 +++ gcc/reg-stack.cc2022-11-18 16:54:43.090381887 +0100 @@ -263,14 +263,14 @@ static void swap_to_top (rtx_insn *, sta static bool move_for_stack_reg (rtx_insn *, stack_ptr, rtx); static bool move_nan_for_stack_reg (rtx_insn *, stack_ptr, rtx); static int swap_rtx_condition_1 (rtx); -static int swap_rtx_condition (rtx_insn *); +static int swap_rtx_condition (rtx_insn *, int &); static void compare_for_stack_reg (rtx_insn *, stack_ptr, rtx, bool); static bool subst_stack_regs_pat (rtx_insn *, stack_ptr, rtx); static void subst_asm_stack_regs (rtx_insn *, stack_ptr); static bool subst_stack_regs (rtx_insn *, stack_ptr); static void change_stack (rtx_insn *, stack_ptr, stack_ptr, enum emit_where); static void print_stack (FILE *, stack_ptr); -static rtx_insn *next_flags_user (rtx_insn *); +static rtx_insn *next_flags_user (rtx_insn *, int &); /* Return nonzero if any stack register is mentioned somewhere within PAT. */ @@ -336,7 +336,7 @@ stack_regs_mentioned (const_rtx insn) static rtx ix86_flags_rtx; static rtx_insn * -next_flags_user (rtx_insn *insn) +next_flags_user (rtx_insn *insn, int _seen) { /* Search forward looking for the first use of this value. Stop at block boundaries. */ @@ -346,7 +346,14 @@ next_flags_user (rtx_insn *insn) insn = NEXT_INSN (insn); if (INSN_P (insn) && reg_mentioned_p (ix86_flags_rtx, PATTERN (insn))) - return insn; + { + if (DEBUG_INSN_P (insn) && debug_seen >= 0) + { + debug_seen = 1; + continue; + } + return insn; + } if (CALL_P (insn)) return NULL; @@ -1248,8 +1255,22 @@
Re: [PATCH] i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628]
On Sat, Nov 19, 2022 at 9:53 AM Jakub Jelinek wrote: > > On Fri, Oct 21, 2022 at 10:23:14AM +0200, Uros Bizjak wrote: > > OK, but now we have two more copies of a function that effectively > > extends BF to SF. Can you please split this utility function out and > > use it here and in cbranchbf4/cstorebf4? I'm talking about this part: > > > > + op = gen_lowpart (HImode, op1); > > + if (CONST_INT_P (op)) > > + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > > +op1, BFmode); > > + else > > + { > > + rtx t1 = gen_reg_rtx (SImode); > > + emit_insn (gen_zero_extendhisi2 (t1, op)); > > + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > > + op = gen_lowpart (SFmode, t1); > > + } > > > > Taking this a bit further, it looks like a generic function to extend > > BF to SF, when extendbfsf2 named function is not defined. > > > > The above could be a follow-up patch, the proposed patch is OK. > > Sorry for the delay, only got to this now. > And I'm fixing the sNaN handling in it too. If the argument is a BFmode sNaN > constant, we want in this case just a SFmode sNaN constant, but > simplify_const_unary_operation (FLOAT_EXTEND, ...) > in that case returns NULL (as normally conversions of a sNaN to some > other float type should raise an exception). In this case we want > to bypass that, as we know the sNaN will be used immediately in the SFmode > comparison a few instructions later. The patch fixes it by just > simplifying the lowpart to HImode and its zero extension to SImode, then > force into a pseudo and do the left shift and subreg to SFmode on the > pseudo. CSE or combine can handle it later. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2022-11-19 Jakub Jelinek > > PR target/107628 > * config/i386/i386-protos.h (ix86_expand_fast_convert_bf_to_sf): > Declare. > * config/i386/i386-expand.cc (ix86_expand_fast_convert_bf_to_sf): New > function. > * config/i386/i386.md (cbranchbf4, cstorebf4): Use it. > > * gcc.target/i386/pr107628.c: New test. OK. Thanks, Uros. > > --- gcc/config/i386/i386-protos.h.jj2022-10-10 09:31:57.234987578 +0200 > +++ gcc/config/i386/i386-protos.h 2022-11-18 12:21:26.975706528 +0100 > @@ -227,6 +227,7 @@ extern void ix86_expand_atomic_fetch_op_ > bool, bool); > extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, > bool, rtx_code_label *); > +extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); > > #ifdef TREE_CODE > extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); > --- gcc/config/i386/i386-expand.cc.jj 2022-11-11 08:15:45.452186618 +0100 > +++ gcc/config/i386/i386-expand.cc 2022-11-18 12:35:16.646193028 +0100 > @@ -24138,4 +24138,30 @@ ix86_expand_cmpxchg_loop (rtx *ptarget_b >*ptarget_bool = target_bool; > } > > +/* Convert a BFmode VAL to SFmode without signaling sNaNs. > + This is done by returning SF SUBREG of ((HI SUBREG) (VAL)) << 16. */ > + > +rtx > +ix86_expand_fast_convert_bf_to_sf (rtx val) > +{ > + rtx op = gen_lowpart (HImode, val), ret; > + if (CONST_INT_P (op)) > +{ > + ret = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > + val, BFmode); > + if (ret) > + return ret; > + /* FLOAT_EXTEND simplification will fail if VAL is a sNaN. */ > + ret = gen_reg_rtx (SImode); > + emit_move_insn (ret, GEN_INT (INTVAL (op) & 0x)); > +} > + else > +{ > + ret = gen_reg_rtx (SImode); > + emit_insn (gen_zero_extendhisi2 (ret, op)); > +} > + emit_insn (gen_ashlsi3 (ret, ret, GEN_INT (16))); > + return gen_lowpart (SFmode, ret); > +} > + > #include "gt-i386-expand.h" > --- gcc/config/i386/i386.md.jj 2022-11-07 10:30:42.727630162 +0100 > +++ gcc/config/i386/i386.md 2022-11-18 12:22:25.172898912 +0100 > @@ -1668,28 +1668,8 @@ (define_expand "cbranchbf4" > (pc)))] >"" > { > - rtx op1 = gen_lowpart (HImode, operands[1]); > - if (CONST_INT_P (op1)) > -op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[1], BFmode); > - else > -{ > - rtx t1 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t1, op1)); > - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > - op1 = gen_lowpart (SFmode, t1); > -} > - rtx op2 = gen_lowpart (HImode, operands[2]); > - if (CONST_INT_P (op2)) > -op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > - operands[2], BFmode); > - else > -{ > - rtx t2 = gen_reg_rtx (SImode); > - emit_insn (gen_zero_extendhisi2 (t2, op2)); > - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); > - op2 = gen_lowpart
[PATCH] i386: Outline fast BF -> SF conversion and fix up sNaN handling in it [PR107628]
On Fri, Oct 21, 2022 at 10:23:14AM +0200, Uros Bizjak wrote: > OK, but now we have two more copies of a function that effectively > extends BF to SF. Can you please split this utility function out and > use it here and in cbranchbf4/cstorebf4? I'm talking about this part: > > + op = gen_lowpart (HImode, op1); > + if (CONST_INT_P (op)) > + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, > +op1, BFmode); > + else > + { > + rtx t1 = gen_reg_rtx (SImode); > + emit_insn (gen_zero_extendhisi2 (t1, op)); > + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); > + op = gen_lowpart (SFmode, t1); > + } > > Taking this a bit further, it looks like a generic function to extend > BF to SF, when extendbfsf2 named function is not defined. > > The above could be a follow-up patch, the proposed patch is OK. Sorry for the delay, only got to this now. And I'm fixing the sNaN handling in it too. If the argument is a BFmode sNaN constant, we want in this case just a SFmode sNaN constant, but simplify_const_unary_operation (FLOAT_EXTEND, ...) in that case returns NULL (as normally conversions of a sNaN to some other float type should raise an exception). In this case we want to bypass that, as we know the sNaN will be used immediately in the SFmode comparison a few instructions later. The patch fixes it by just simplifying the lowpart to HImode and its zero extension to SImode, then force into a pseudo and do the left shift and subreg to SFmode on the pseudo. CSE or combine can handle it later. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-11-19 Jakub Jelinek PR target/107628 * config/i386/i386-protos.h (ix86_expand_fast_convert_bf_to_sf): Declare. * config/i386/i386-expand.cc (ix86_expand_fast_convert_bf_to_sf): New function. * config/i386/i386.md (cbranchbf4, cstorebf4): Use it. * gcc.target/i386/pr107628.c: New test. --- gcc/config/i386/i386-protos.h.jj2022-10-10 09:31:57.234987578 +0200 +++ gcc/config/i386/i386-protos.h 2022-11-18 12:21:26.975706528 +0100 @@ -227,6 +227,7 @@ extern void ix86_expand_atomic_fetch_op_ bool, bool); extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, bool, rtx_code_label *); +extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); --- gcc/config/i386/i386-expand.cc.jj 2022-11-11 08:15:45.452186618 +0100 +++ gcc/config/i386/i386-expand.cc 2022-11-18 12:35:16.646193028 +0100 @@ -24138,4 +24138,30 @@ ix86_expand_cmpxchg_loop (rtx *ptarget_b *ptarget_bool = target_bool; } +/* Convert a BFmode VAL to SFmode without signaling sNaNs. + This is done by returning SF SUBREG of ((HI SUBREG) (VAL)) << 16. */ + +rtx +ix86_expand_fast_convert_bf_to_sf (rtx val) +{ + rtx op = gen_lowpart (HImode, val), ret; + if (CONST_INT_P (op)) +{ + ret = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, + val, BFmode); + if (ret) + return ret; + /* FLOAT_EXTEND simplification will fail if VAL is a sNaN. */ + ret = gen_reg_rtx (SImode); + emit_move_insn (ret, GEN_INT (INTVAL (op) & 0x)); +} + else +{ + ret = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (ret, op)); +} + emit_insn (gen_ashlsi3 (ret, ret, GEN_INT (16))); + return gen_lowpart (SFmode, ret); +} + #include "gt-i386-expand.h" --- gcc/config/i386/i386.md.jj 2022-11-07 10:30:42.727630162 +0100 +++ gcc/config/i386/i386.md 2022-11-18 12:22:25.172898912 +0100 @@ -1668,28 +1668,8 @@ (define_expand "cbranchbf4" (pc)))] "" { - rtx op1 = gen_lowpart (HImode, operands[1]); - if (CONST_INT_P (op1)) -op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[1], BFmode); - else -{ - rtx t1 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t1, op1)); - emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); - op1 = gen_lowpart (SFmode, t1); -} - rtx op2 = gen_lowpart (HImode, operands[2]); - if (CONST_INT_P (op2)) -op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, - operands[2], BFmode); - else -{ - rtx t2 = gen_reg_rtx (SImode); - emit_insn (gen_zero_extendhisi2 (t2, op2)); - emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16))); - op2 = gen_lowpart (SFmode, t2); -} + rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]); + rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]); do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0, SFmode, NULL_RTX, NULL, as_a
[PATCH] i386: Uglify some local identifiers in *intrin.h [PR107748]
Hi! While reporting PR107748 (where is a problem with non-uglified names, but I've left it out because it needs fixing anyway), I've noticed various spots where identifiers in *intrin.h headers weren't uglified. The following patch fixed those that are related to unions (I've grepped for [a-zA-Z]\.[a-zA-Z] spots). The reason we need those to be uglified is the same as why the arguments of the inlines are __ prefixed and most of automatic vars in the inlines - say a, v or u aren't part of implementation namespace and so users could #define u whatever->something #include and it should still work, as long as u is not e.g. one of the names of the functions/macros the header provides (_mm* etc.). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-11-19 Jakub Jelinek PR target/107748 * config/i386/avx512fp16intrin.h (_mm512_castph512_ph128, _mm512_castph512_ph256, _mm512_castph128_ph512, _mm512_castph256_ph512, _mm512_set1_pch): Uglify names of local variables and union members. * config/i386/avx512fp16vlintrin.h (_mm256_castph256_ph128, _mm256_castph128_ph256, _mm256_set1_pch, _mm_set1_pch): Likewise. * config/i386/smmintrin.h (_mm_extract_ps): Likewise. --- gcc/config/i386/avx512fp16intrin.h.jj 2022-09-27 08:03:26.974984702 +0200 +++ gcc/config/i386/avx512fp16intrin.h 2022-11-18 12:51:10.668957336 +0100 @@ -272,10 +272,10 @@ _mm512_castph512_ph128 (__m512h __A) { union { -__m128h a[4]; -__m512h v; - } u = { .v = __A }; - return u.a[0]; +__m128h __a[4]; +__m512h __v; + } __u = { .__v = __A }; + return __u.__a[0]; } extern __inline __m256h @@ -284,10 +284,10 @@ _mm512_castph512_ph256 (__m512h __A) { union { -__m256h a[2]; -__m512h v; - } u = { .v = __A }; - return u.a[0]; +__m256h __a[2]; +__m512h __v; + } __u = { .__v = __A }; + return __u.__a[0]; } extern __inline __m512h @@ -296,11 +296,11 @@ _mm512_castph128_ph512 (__m128h __A) { union { -__m128h a[4]; -__m512h v; - } u; - u.a[0] = __A; - return u.v; +__m128h __a[4]; +__m512h __v; + } __u; + __u.__a[0] = __A; + return __u.__v; } extern __inline __m512h @@ -309,11 +309,11 @@ _mm512_castph256_ph512 (__m256h __A) { union { -__m256h a[2]; -__m512h v; - } u; - u.a[0] = __A; - return u.v; +__m256h __a[2]; +__m512h __v; + } __u; + __u.__a[0] = __A; + return __u.__v; } extern __inline __m512h @@ -7156,11 +7156,11 @@ _mm512_set1_pch (_Float16 _Complex __A) { union { -_Float16 _Complex a; -float b; - } u = { .a = __A}; +_Float16 _Complex __a; +float __b; + } __u = { .__a = __A}; - return (__m512h) _mm512_set1_ps (u.b); + return (__m512h) _mm512_set1_ps (__u.__b); } // intrinsics below are alias for f*mul_*ch --- gcc/config/i386/avx512fp16vlintrin.h.jj 2022-01-11 23:11:21.760299007 +0100 +++ gcc/config/i386/avx512fp16vlintrin.h2022-11-18 12:52:23.242951737 +0100 @@ -124,10 +124,10 @@ _mm256_castph256_ph128 (__m256h __A) { union { -__m128h a[2]; -__m256h v; - } u = { .v = __A }; - return u.a[0]; +__m128h __a[2]; +__m256h __v; + } __u = { .__v = __A }; + return __u.__a[0]; } extern __inline __m256h @@ -136,11 +136,11 @@ _mm256_castph128_ph256 (__m128h __A) { union { -__m128h a[2]; -__m256h v; - } u; - u.a[0] = __A; - return u.v; +__m128h __a[2]; +__m256h __v; + } __u; + __u.__a[0] = __A; + return __u.__v; } extern __inline __m256h @@ -3317,11 +3317,11 @@ _mm256_set1_pch (_Float16 _Complex __A) { union { -_Float16 _Complex a; -float b; - } u = { .a = __A }; +_Float16 _Complex __a; +float __b; + } __u = { .__a = __A }; - return (__m256h) _mm256_set1_ps (u.b); + return (__m256h) _mm256_set1_ps (__u.__b); } extern __inline __m128h @@ -3330,11 +3330,11 @@ _mm_set1_pch (_Float16 _Complex __A) { union { -_Float16 _Complex a; -float b; - } u = { .a = __A }; +_Float16 _Complex __a; +float __b; + } __u = { .__a = __A }; - return (__m128h) _mm_set1_ps (u.b); + return (__m128h) _mm_set1_ps (__u.__b); } // intrinsics below are alias for f*mul_*ch --- gcc/config/i386/smmintrin.h.jj 2022-04-19 07:20:56.429171229 +0200 +++ gcc/config/i386/smmintrin.h 2022-11-18 12:53:26.226079037 +0100 @@ -365,17 +365,18 @@ _mm_insert_ps (__m128 __D, __m128 __S, c extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_ps (__m128 __X, const int __N) { - union { int i; float f; } __tmp; - __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N); - return __tmp.i; + union { int __i; float __f; } __tmp; + __tmp.__f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N); + return __tmp.__i; } #else #define _mm_extract_ps(X, N) \ (__extension__ \ ({