[patch, libfortran] Fortran 2018: Support d0.d, e0.d, es0.d, en0.d, g0.d and ew.d e0 edit descriptors

2019-12-29 Thread Jerry

Hi all,

The attached patch includes adjustments to the test case.

The Fortran Standard states the exponent width when using the e0 
exponent specfier results in the smallest possible exponent width.  This 
patch implements that case.


I got frustrated with trying to re-understand this code segment and even 
found some dead code in there.  As a result I did some major refactoring 
of the code and separated out the zero width, positive width, and no 
width DEC extensions into their own chunks.  I also added comments in 
hopes of helping others follow what this is doing and how it works.


This patch resolves some parsing issues currently on trunk where a 
format specifier following the e0.d.e specifier would result in an error 
(comments 12 and 20 of the PR). These are fixed.


The patch, as it is, passes regression testing but I must confess I may 
not have all the DEC stuff right yet and I propose we commit the patch 
here and address any DEC stuff as a follow up. (I will be looking at the 
DEC stuff in the next few days.)


OK for trunk?

Regards,

Jerry
diff --git a/gcc/testsuite/gfortran.dg/fmt_zero_width.f90 b/gcc/testsuite/gfortran.dg/fmt_zero_width.f90
index 640b6735c65..db2cca6e28a 100644
--- a/gcc/testsuite/gfortran.dg/fmt_zero_width.f90
+++ b/gcc/testsuite/gfortran.dg/fmt_zero_width.f90
@@ -9,32 +9,34 @@ program pr90374
   rn = 0.00314_4
   afmt = "(D0.3)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "0.314D-02") stop 12
+  if (aresult /= "0.314D-2") stop 12
   afmt = "(E0.10)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "0.313928E-02") stop 15
+  if (aresult /= "0.313928E-2") stop 15
   afmt = "(ES0.10)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "3.139280E-03") stop 18
+  if (aresult /= "3.139280E-3") stop 18
   afmt = "(EN0.10)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "3.139280E-03") stop 21
+  if (aresult /= "3.139280E-3") stop 21
   afmt = "(G0.10)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "0.313928E-02") stop 24
+  if (aresult /= "0.313928E-2") stop 24
   afmt = "(E0.10e0)"
   write (aresult,fmt=afmt) rn
-  if (aresult /= "0.313928E-02") stop 27
+  if (aresult /= "0.313928E-2") stop 27
   write (aresult,fmt="(D0.3)") rn
-  if (aresult /= "0.314D-02") stop 29
+  if (aresult /= "0.314D-2") stop 29
   write (aresult,fmt="(E0.10)") rn
-  if (aresult /= "0.313928E-02") stop 31
+  if (aresult /= "0.313928E-2") stop 31
   write (aresult,fmt="(ES0.10)") rn
-  if (aresult /= "3.139280E-03") stop 33
+  if (aresult /= "3.139280E-3") stop 33
   write (aresult,fmt="(EN0.10)") rn
-  if (aresult /= "3.139280E-03") stop 35
+  if (aresult /= "3.139280E-3") stop 35
   write (aresult,fmt="(G0.10)") rn
-  if (aresult /= "0.313928E-02") stop 37
+  if (aresult /= "0.313928E-2") stop 37
   write (aresult,fmt="(E0.10e0)") rn
-  if (aresult /= "0.313928E-02") stop 39
+  if (aresult /= "0.313928E-2") stop 39
+  write (aresult,fmt="(E0.10e3)") rn
+  if (aresult /= ".313928E-002") stop 41
 end
diff --git a/libgfortran/io/format.c b/libgfortran/io/format.c
index 0b23721c055..1406e46693a 100644
--- a/libgfortran/io/format.c
+++ b/libgfortran/io/format.c
@@ -38,7 +38,7 @@ static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
 
 /* Error messages. */
 
-static const char posint_required[] = "Positive width required in format",
+static const char posint_required[] = "Positive integer required in format",
   period_required[] = "Period required in format",
   nonneg_required[] = "Nonnegative width required in format",
   unexpected_element[] = "Unexpected element '%c' in format\n",
@@ -925,6 +925,8 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
   tail->repeat = repeat;
 
   u = format_lex (fmt);
+  
+  /* Processing for zero width formats.  */
   if (u == FMT_ZERO)
 	{
 	  *seen_dd = true;
@@ -935,6 +937,8 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
 	  goto finished;
 	}
 	  tail->u.real.w = 0;
+
+	  /* Look for the dot seperator.  */
 	  u = format_lex (fmt);
 	  if (u != FMT_PERIOD)
 	{
@@ -942,108 +946,120 @@ parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
 	  break;
 	}
 
+	  /* Look for the precision.  */
 	  u = format_lex (fmt);
-	  if (u != FMT_POSINT)
-	notify_std (>common, GFC_STD_F2003,
-			"Positive width required");
+	  if (u != FMT_ZERO && u != FMT_POSINT)
+	{
+	  fmt->error = nonneg_required;
+	  goto finished;
+	}
 	  tail->u.real.d = fmt->value;
-	  break;
-	}
-  if (t == FMT_F && dtp->u.p.mode == WRITING)
-	{
-	  *seen_dd = true;
-	  if (u != FMT_POSINT && u != FMT_ZERO)
+	  
+	  /* Look for optional exponent */
+	  u = format_lex (fmt);
+	  if (u != FMT_E)
+	fmt->saved_token = u;
+	  else
 	{
-	  if (dtp->common.flags & IOPARM_DT_DEC_EXT)
+	  u = format_lex (fmt);
+	  if (u != FMT_POSINT)
 		{
-		  tail->u.real.w = DEFAULT_WIDTH;
-		  

Re: *ping**2 Re: [Patch, Fortran] OpenMP/OpenACC – fix more issues with OPTIONAL

2019-12-29 Thread Jerry
Between Holidays and being short on people that understand this, I would 
say commit it unless Jakub objects.


(When in doubt, make a decision and move forward principle, assuming one 
is not stupid,)


Cheers,

Jerry

On 12/29/19 2:27 PM, Tobias Burnus wrote:


On 12/16/19 9:06 AM, Tobias Burnus wrote:

Ping.

On 12/10/19 6:54 PM, Tobias Burnus wrote:
Nonallocatable, nonpointer array arguments (of assumed shape) are 
special as they get a get an array descriptor ('arg') as argument but 
create a local variable which accesses the actual data ('arg.0 = 
arg->data').


With OPTIONAL, there are/were two outstanding issues:

(A) If the argument is not present, 'arg.0' is/was never assigned to.

(B) The optional-arg-is-present check is not just 'if (arg)' but 'if 
(arg && arg->data)' as passing an unallocated 
allocatable/disassociated pointer (i.e. 'arg->data = NULL') to a 
nonpointer, nonallocatable optional dummy argument counts as absent 
argument; this affects (A).


Solution: 




Re: *ping*[patch, fortran] Fix PR 91541, ICE on valid for INDEX

2019-12-29 Thread Jerry

On 12/29/19 2:16 AM, Thomas Koenig wrote:

Am 19.12.19 um 08:23 schrieb Thomas Koenig:


Regression-tested. OK for trunk?


Ping?


This looks good Thomas,

Thanks for patch,

Jerry


Re: [patch, fortran] Updated fix PR 92961, ICE on division by zero error in array bounds

2019-12-29 Thread Jerry

This one looks OK Thomas

Cheers,

Jerry

On 12/22/19 7:28 AM, Thomas Koenig wrote:

Hello world,

here is an update for the fix for PR 92961, which also takes care
of the second test case in the PR (included in the first one).

The patch itself should be clear enough - make sure that there
is a MATCH_ERROR on matching an array spec which contains 0/(0).
Rather than pass around information several calls deep, I chose
to use a global variable.

Regression-tested. OK for trunk?

(Only a few bugs to fix to be at least below 900 bugs at the end
of the year, by the way - we are at 389 submitted bugs vs. 461 closed,
which is not bad).

Regards

 Thomas

2019-12-22  Thomas Koenig  

 PR fortran/92961
 * gfortran.h (gfc_seen_div0): Add declaration.
 * arith.h (gfc_seen_div0): Add definition.
 (eval_intrinsic): For integer division by zero, set gfc_seen_div0.
 * decl.c (variable_decl):  If resolution resp. simplification
 fails for array spec and a division of zero error has been
 seen, return MATCH_ERROR.

2019-12-22  Thomas Koenig  

 PR fortran/92961
 * gfortran.dg/arith_divide_2.f90: New test.




Re: [C++ PATCH] PR c++/92745 - bogus error when initializing array of vectors.

2019-12-29 Thread Jakub Jelinek
On Fri, Dec 20, 2019 at 06:22:10PM -0500, Marek Polacek wrote:
> > > 2019-12-20  Marek Polacek  
> > > 
> > >   PR c++/92745 - bogus error when initializing array of vectors.
> > >   * decl.c (reshape_init_r): For a nested compound literal, do
> > >   call reshape_init_{class,array,vector}.
> > > 
> > >   * g++.dg/cpp0x/initlist118.C: New test.

I've missed that the testcase FAILs on i686-linux, with
/home/jakub/src/gcc/gcc/testsuite/g++.dg/cpp0x/initlist118.C: In function 
'array<__vector(4) float, 4> transpose(array<__vector(4) float, 4>)':
/home/jakub/src/gcc/gcc/testsuite/g++.dg/cpp0x/initlist118.C:20:28: warning: 
SSE vector return without SSE enabled changes the ABI [-Wpsabi]
/home/jakub/src/gcc/gcc/testsuite/g++.dg/cpp0x/initlist118.C:17:1: note: the 
ABI for passing parameters with 16-byte alignment has changed in GCC 4.6
FAIL: g++.dg/cpp0x/initlist118.C  -std=c++11 (test for excess errors)
Excess errors:
/home/jakub/src/gcc/gcc/testsuite/g++.dg/cpp0x/initlist118.C:20:28: warning: 
SSE vector return without SSE enabled changes the ABI [-Wpsabi]
etc.

Fixed thusly, tested on {x86_64,i686}-linux, committed to trunk as obvious.
The -w in there is for targets which don't use -Wno-psabi for their
ABI warnings.

2019-12-30  Jakub Jelinek  

PR c++/92745
* g++.dg/cpp0x/initlist118.C: Add -Wno-psabi -w to dg-options.

--- gcc/testsuite/g++.dg/cpp0x/initlist118.C.jj 2019-12-21 00:50:54.405407491 
+0100
+++ gcc/testsuite/g++.dg/cpp0x/initlist118.C2019-12-30 00:15:59.429857194 
+0100
@@ -1,5 +1,6 @@
 // PR c++/92745 - bogus error when initializing array of vectors.
 // { dg-do compile { target c++11 } }
+// { dg-options "-Wno-psabi -w" }
 
 template  struct c {
   typedef a d[b];


Jakub



[PATCH] Fix vextract* masked patterns (PR target/93069)

2019-12-29 Thread Jakub Jelinek
Hi!

The AVX512F documentation clearly states that in instructions where the
destination is a memory only merging-masking is possible, not zero-masking,
and the assembler enforces that.

The testcase in this patch fails to assemble because of
Error: unsupported masking for `vextracti32x8'
on
vextracti32x8   $0x0, %zmm1, -64(%rsp){%k1}{z}
For the vector extraction patterns, we apparently have 7 *_maskm patterns
that only accept memory destinations and rtx_equal_p merge-masking source
for it, 7 * corresponding patterns that allow memory destination
only for the non-masked cases (through ), then 2
* patterns (lo ssehalf V16FI and lo ssehalf VI8F_256 ones) which
do allow memory destination even for masked cases and are the cause of the
testsuite failure, because we must not allow C constraint if the destination
is m, and finally one pair of patterns (separate * and *_mask, hi ssehalf
VI4F_256), which has another issue (for which I don't have a testcase
though), where if it would match zero-masking with register destination,
it wouldn't emit the needed {z} into assembly.
The attached patch fixes those 3 issues only, perhaps more suitable for
backporting.
But, even with that fixed, we are missing 3 further *_maskm patterns and
more importantly, I find the split into 3 separate patterns after subst,
*_maskm for masking with memory destination, *_mask for masking with
register destination and * for non-masking unnecessarily complex and harder
for reload, so the included patch below (non-attached) instead kills all
*_maskm patterns and splits the * patterns into * and *_mask
by hand instead of subst, where the *_mask ones make sure that with v
destination they use 0C, while with m destination they use 0 and as
condition enforce that either destination is not MEM, or rtx_equal_p between
the destination and corresponding merging-masking operand source.
If we had those 3 missing *_maskm patterns, this patch would actually result
in both shorter sse.md and shorter machine description after subst (e.g.
length of tmp-mddump.md), as we don't have them, the patch is actually 16
lines longer sse.md, but still shorter tmp-mddump.md.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk (and is
the shorter patch ok for backports)?

2019-12-30  Jakub Jelinek  

PR target/93069
* config/i386/subst.md (store_mask_constraint, store_mask_predicate):
Remove.
(avx512dq_vextract64x2_1_maskm,
avx512f_vextract32x4_1_maskm,
vec_extract_lo__maskm, vec_extract_hi__maskm): Remove.
(avx512dq_vextract64x2_1): Split
into ...
(*avx512dq_vextract64x2_1,
avx512dq_vextract64x2_1_mask): ... these new
define_insns.  Even in the masked variant allow memory output but in
that case use 0 rather than 0C constraint on the source of masked-out
elts.
(avx512f_vextract32x4_1): Split
into ...
(*avx512f_vextract32x4_1,
avx512f_vextract32x4_1_mask): ... these new define_insns.
Even in the masked variant allow memory output but in that case use
0 rather than 0C constraint on the source of masked-out elts.
(vec_extract_lo_): Split into ...
(vec_extract_lo_, vec_extract_lo__mask): ... these new
define_insns.  Even in the masked variant allow memory output but in
that case use 0 rather than 0C constraint on the source of masked-out
elts.
(vec_extract_hi_): Split into ...
(vec_extract_hi_, vec_extract_hi__mask): ... these new
define_insns.  Even in the masked variant allow memory output but in
that case use 0 rather than 0C constraint on the source of masked-out
elts.

* gcc.target/i386/avx512vl-pr93069.c: New test.
* gcc.dg/vect/pr93069.c: New test.

--- gcc/config/i386/subst.md.jj 2019-10-28 22:16:14.651007061 +0100
+++ gcc/config/i386/subst.md2019-12-28 14:43:56.654042070 +0100
@@ -57,8 +57,6 @@ (define_subst_attr "mask_mode512bit_cond
 (define_subst_attr "mask_avx512vl_condition" "mask" "1" "TARGET_AVX512VL")
 (define_subst_attr "mask_avx512bw_condition" "mask" "1" "TARGET_AVX512BW")
 (define_subst_attr "mask_avx512dq_condition" "mask" "1" "TARGET_AVX512DQ")
-(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
-(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" 
"register_operand")
 (define_subst_attr "mask_prefix" "mask" "vex" "evex")
 (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
 (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex,evex")
--- gcc/config/i386/sse.md.jj   2019-12-27 18:16:48.146431083 +0100
+++ gcc/config/i386/sse.md  2019-12-29 12:36:33.232414154 +0100
@@ -8415,60 +8415,31 @@ (define_expand "_vextract<
   DONE;
 })
 
-(define_insn "avx512dq_vextract64x2_1_maskm"
-  [(set (match_operand: 0 "memory_operand" "=m")
+(define_insn "avx512dq_vextract64x2_1_mask"
+  [(set (match_operand: 0 "nonimmediate_operand" "=v,m")
   

*ping**2 Re: [Patch, Fortran] OpenMP/OpenACC – fix more issues with OPTIONAL

2019-12-29 Thread Tobias Burnus



On 12/16/19 9:06 AM, Tobias Burnus wrote:

Ping.

On 12/10/19 6:54 PM, Tobias Burnus wrote:
Nonallocatable, nonpointer array arguments (of assumed shape) are 
special as they get a get an array descriptor ('arg') as argument but 
create a local variable which accesses the actual data ('arg.0 = 
arg->data').


With OPTIONAL, there are/were two outstanding issues:

(A) If the argument is not present, 'arg.0' is/was never assigned to.

(B) The optional-arg-is-present check is not just 'if (arg)' but 'if 
(arg && arg->data)' as passing an unallocated 
allocatable/disassociated pointer (i.e. 'arg->data = NULL') to a 
nonpointer, nonallocatable optional dummy argument counts as absent 
argument; this affects (A).


Solution: 


Re: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching

2019-12-29 Thread Andrew Pinski
On Mon, Oct 7, 2019 at 3:05 AM Richard Biener
 wrote:
>
> On Tue, Oct 1, 2019 at 1:48 PM Dmitrij Pochepko
>  wrote:
> >
> > Hi Richard,
> >
> > I updated patch according to all your comments.
> > Also bootstrapped and tested again on x86_64-pc-linux-gnu and 
> > aarch64-linux-gnu, which took some time.
> >
> > attached v3.
>
> OK.

This introduced PR 93098 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93098 ).

Thanks,
Andrew Pinski

>
> Thanks,
> Richard.
>
> > Thanks,
> > Dmitrij
> >
> > On Thu, Sep 26, 2019 at 09:47:04AM +0200, Richard Biener wrote:
> > > On Tue, Sep 24, 2019 at 5:29 PM Dmitrij Pochepko
> > >  wrote:
> > > >
> > > > Hi,
> > > >
> > > > can anybody take a look at v2?
> > >
> > > +(if (tree_to_uhwi (@4) == 1
> > > + && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
> > >
> > > those will still ICE for large __int128_t constants.  Since you do not 
> > > match
> > > any conversions you should probably restrict the precision of 'type' like
> > > with
> > >(if (TYPE_PRECISION (type) <= 64
> > > && tree_to_uhwi (@4) ...
> > >
> > > likewise tree_to_uhwi will fail for negative constants thus if the
> > > pattern assumes
> > > unsigned you should verify that as well with && TYPE_UNSIGNED  (type).
> > >
> > > Your 'argtype' is simply 'type' so you can elide it.
> > >
> > > +   (switch
> > > +   (if (types_match (argtype, long_long_unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNTLL:integer_type_node @0)))
> > > +   (if (types_match (argtype, long_unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNTL:integer_type_node @0)))
> > > +   (if (types_match (argtype, unsigned_type_node))
> > > + (convert (BUILT_IN_POPCOUNT:integer_type_node @0)))
> > >
> > > Please test small types first so we can avoid popcountll when long == 
> > > long long
> > > or long == int.  I also wonder if we really want to use the builtins and
> > > check optab availability or if we nowadays should use
> > > direct_internal_fn_supported_p (IFN_POPCOUNT, integer_type_node, type,
> > > OPTIMIZE_FOR_BOTH) and
> > >
> > > (convert (IFN_POPCOUNT:type @0))
> > >
> > > without the switch?
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > Thanks,
> > > > Dmitrij
> > > >
> > > > On Mon, Sep 09, 2019 at 10:03:40PM +0300, Dmitrij Pochepko wrote:
> > > > > Hi all.
> > > > >
> > > > > Please take a look at v2 (attached).
> > > > > I changed patch according to review comments. The same testing was 
> > > > > performed again.
> > > > >
> > > > > Thanks,
> > > > > Dmitrij
> > > > >
> > > > > On Thu, Sep 05, 2019 at 06:34:49PM +0300, Dmitrij Pochepko wrote:
> > > > > > This patch adds matching for Hamming weight (popcount) 
> > > > > > implementation. The following sources:
> > > > > >
> > > > > > int
> > > > > > foo64 (unsigned long long a)
> > > > > > {
> > > > > > unsigned long long b = a;
> > > > > > b -= ((b>>1) & 0xULL);
> > > > > > b = ((b>>2) & 0xULL) + (b & 
> > > > > > 0xULL);
> > > > > > b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL;
> > > > > > b *= 0x0101010101010101ULL;
> > > > > > return (int)(b >> 56);
> > > > > > }
> > > > > >
> > > > > > and
> > > > > >
> > > > > > int
> > > > > > foo32 (unsigned int a)
> > > > > > {
> > > > > > unsigned long b = a;
> > > > > > b -= ((b>>1) & 0xUL);
> > > > > > b = ((b>>2) & 0xUL) + (b & 0xUL);
> > > > > > b = ((b>>4) + b) & 0x0F0F0F0FUL;
> > > > > > b *= 0x01010101UL;
> > > > > > return (int)(b >> 24);
> > > > > > }
> > > > > >
> > > > > > and equivalents are now recognized as popcount for platforms with 
> > > > > > hw popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu 
> > > > > > and aarch64-linux-gnu systems with no regressions.
> > > > > >
> > > > > > (I have no write access to repo)
> > > > > >
> > > > > > Thanks,
> > > > > > Dmitrij
> > > > > >
> > > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > > PR tree-optimization/90836
> > > > > >
> > > > > > * gcc/match.pd (popcount): New pattern.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > > PR tree-optimization/90836
> > > > > >
> > > > > > * lib/target-supports.exp (check_effective_target_popcount)
> > > > > > (check_effective_target_popcountll): New effective targets.
> > > > > > * gcc.dg/tree-ssa/popcount4.c: New test.
> > > > > > * gcc.dg/tree-ssa/popcount4l.c: New test.
> > > > > > * gcc.dg/tree-ssa/popcount4ll.c: New test.
> > > > >
> > > > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > > > index 0317bc7..b1867bf 100644
> > > > > > --- a/gcc/match.pd
> > > > > > +++ b/gcc/match.pd
> > > > > > @@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > > >(cmp (popcount @0) integer_zerop)
> > > > > >(rep @0 { build_zero_cst (TREE_TYPE (@0)); }
> > > > > >
> > > > > > +/* 64- and 32-bits branchless 

Re: C++ PATCH for c++/88337 - Implement P1327R1: Allow dynamic_cast in constexpr

2019-12-29 Thread Marek Polacek
On Sat, Dec 21, 2019 at 04:50:41PM -0500, Jason Merrill wrote:
> On 12/17/19 5:34 PM, Marek Polacek wrote:
> > +  /* [class.cdtor]/6 "If the operand of the dynamic_cast refers to
> > + the object under construction or destruction and the static type
> > + of the operand is not a pointer to or object of the constructor
> > + or destructor's own class or one of its bases, the dynamic_cast
> > + results in undefined behavior."  And undefined behavior should be
> > + detected in constexpr contexts.  */
> > +  if (!same_type_ignoring_top_level_qualifiers_p (mdtype, complete_type))
> > +{
> > +  unsigned ix;
> > +  FOR_EACH_VEC_ELT_REVERSE (call_stack, ix, t)
> > +   if (tree fn = cp_get_callee_fndecl_nofold (t))
> > + if (DECL_CONSTRUCTOR_P (fn))
> > +   {
> > + /* Get *this of the current constructor.  */
> > + tree cdtor_type = initialized_type (t);
> > + if (!DERIVED_FROM_P (objtype, cdtor_type))
> 
> Walking the call_stack is an interesting idea, since ctx only has the
> innermost call, which might not be the relevant constructor.  And the
> innermost call might not even have a pointer to the object under
> construction.

Exactly.

> But this only finds that there is an object under construction, not whether
> obj is part of the same object.  

That is true also.  :/

> It should be possible to construct a
> testcase where we start constructing one object X, and then pass a pointer
> to X to the constructor for Y; doing a dynamic_cast of the X pointer
> shouldn't give an error just because it isn't a base of Y, since the X
> pointer points to the X under construction, not the Y under construction.
> 
> Something like
> 
> struct X;
> struct Y {
>   virtual void f();
>   Y(X* x) { dynamic_cast(x); } // returns NULL
> };
> struct X
> {
>   virtual void f();
>   X() { Y(this); }
> };
> struct Z: X
> {
>   virtual void f();
> } z;
> 
> Note that in constexpr-dynamic17.C, if you reverse the order of "D: A, B" to
> "D: B, A", we hit undefined behavior in the cast because the A vptr isn't
> set yet (though the diagnostic could be better).  A better way to detect
> this undefined behavior for the A, B case might be to clear the vptrs for A
> after we're done constructing it; the most derived constructor will set them
> again once base constructors are done.  This could happen either in
> emit_mem_initializers or in cxx_expand_call_expression.
> 
> But I think let's leave that for a follow-on patch.  Let's drop this hunk
> and reverse the bases in constexpr-dynamic17.C as I mentioned above so we
> still get an error.  OK with that change.

Agreed; I've opened PR93096.  Here's what I've committed after another
bootstrap/regtest:

commit 9d3f24adb6d09184fd348ef8d92e6d0b965e3f00
Author: mpolacek 
Date:   Sun Dec 29 16:44:41 2019 +

PR c++/88337 - Implement P1327R1: Allow dynamic_cast in constexpr.

This patch implements
.

When build_dynamic_cast realizes that a dynamic_cast needs a run-time 
check, it
generates a call to __dynamic_cast -- see dyncast.cc in libsupc++ for its
definition.  The gist of my approach is to evaluate such a call at compile 
time.

* constexpr.c (cxx_dynamic_cast_fn_p): New function.
(extract_obj_from_addr_offset): New function.
(get_component_with_type): New function.
(cxx_eval_dynamic_cast_fn): New function.
(cxx_eval_call_expression): Call cxx_eval_dynamic_cast_fn for a call
to __dynamic_cast.
(potential_constant_expression_1): Don't give up on
cxx_dynamic_cast_fn_p.
* rtti.c (build_dynamic_cast_1): When creating a call to
__dynamic_cast, use the location of the original expression.

* g++.dg/cpp2a/constexpr-dynamic1.C: New test.
* g++.dg/cpp2a/constexpr-dynamic10.C: New test.
* g++.dg/cpp2a/constexpr-dynamic11.C: New test.
* g++.dg/cpp2a/constexpr-dynamic12.C: New test.
* g++.dg/cpp2a/constexpr-dynamic13.C: New test.
* g++.dg/cpp2a/constexpr-dynamic14.C: New test.
* g++.dg/cpp2a/constexpr-dynamic15.C: New test.
* g++.dg/cpp2a/constexpr-dynamic16.C: New test.
* g++.dg/cpp2a/constexpr-dynamic17.C: New test.
* g++.dg/cpp2a/constexpr-dynamic2.C: New test.
* g++.dg/cpp2a/constexpr-dynamic3.C: New test.
* g++.dg/cpp2a/constexpr-dynamic4.C: New test.
* g++.dg/cpp2a/constexpr-dynamic5.C: New test.
* g++.dg/cpp2a/constexpr-dynamic6.C: New test.
* g++.dg/cpp2a/constexpr-dynamic7.C: New test.
* g++.dg/cpp2a/constexpr-dynamic8.C: New test.
* g++.dg/cpp2a/constexpr-dynamic9.C: New test.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@279755 
138bc75d-0d04-0410-961f-82ee72b054a4

diff 

Re: [PATCH] libstdc++: Define std::lexicographical_compare_three_way for C++20

2019-12-29 Thread Stephan Bergmann

On 05/12/2019 13:46, Jonathan Wakely wrote:

commit 5012548fd62526fdf5e04aeacee2b127efbac0e0
Author: Jonathan Wakely 
Date:   Thu Dec 5 12:23:53 2019 +

libstdc++: Define std::lexicographical_compare_three_way for C++20

* include/bits/stl_algobase.h (lexicographical_compare_three_way):

Define for C++20.
* testsuite/25_algorithms/lexicographical_compare_three_way/1.cc: 
New
test.
* testsuite/25_algorithms/lexicographical_compare_three_way/
constexpr.cc: New test.

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 98d324827ed..a2fd306e6d0 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h

[...]

@@ -1456,6 +1459,104 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
 __gnu_cxx::__ops::__iter_comp_iter(__comp));
 }
 
+#if __cpp_lib_three_way_comparison

+#if __cpp_lib_concepts
+  // Iter points to a contiguous range of unsigned narrow character type
+  // or std::byte, suitable for comparison by memcmp.
+  template
+concept __is_byte_iter = contiguous_iterator<_Iter>
+  && __is_byte>::__value != 0
+  && !__gnu_cxx::__numeric_traits>::__is_signed;
+
+  // Return a struct with two members, initialized to the smaller of x and y
+  // (or x if they compare equal) and the result of the comparison x <=> y.
+  template
+constexpr auto
+__min_cmp(_Tp __x, _Tp __y)
+{
+  struct _Res {
+   _Tp _M_min;
+   decltype(__x <=> __y) _M_cmp;
+  };
+  auto __c = __x <=> __y;
+  if (__c > 0)
+   return _Res{__y, __c};
+  return _Res{__x, __c};
+}
+#endif

[...]

+
+  template
+constexpr auto
+lexicographical_compare_three_way(_InputIter1 __first1,
+ _InputIter1 __last1,
+ _InputIter2 __first2,
+ _InputIter2 __last2)
+{
+  return std::lexicographical_compare_three_way(__first1, __last1,
+   __first2, __last2,
+   compare_three_way{});


FYI, the above fails with -std=c++2a and recent Clang trunk after 
 
"Mark the major papers for C++20 consistent comparisons as 'done', and 
start publishing the corresponding feature-test macro":  Clang now 
defines __cpp_impl_three_way_comparison (so 
libstdc++-v3/include/std/version defines __cpp_lib_three_way_comparison) 
but still doesn't define __cpp_lib_concepts, so 
libstdc++-v3/libsupc++/compare doesn't define compare_three_way.


I locally managed that for now with extending the surrounding

 #if __cpp_lib_three_way_comparison

with

 && __cpp_lib_concepts


+}
+#endif // three_way_comparison




Re: [PATCH] Allow {nearby,r}int{,f} vectorization on x86 with sse4.1 and later (PR target/93078)

2019-12-29 Thread Jakub Jelinek
On Sat, Dec 28, 2019 at 02:20:09PM +0100, Uros Bizjak wrote:
> > The conditions are:
> > (define_expand "nearbyint2"
> >   [(use (match_operand:MODEF 0 "register_operand"))
> >(use (match_operand:MODEF 1 "nonimmediate_operand"))]
> >   "(TARGET_USE_FANCY_MATH_387
> > && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> >   || TARGET_MIX_SSE_I387)
> > && !flag_trapping_math)
> >|| (TARGET_SSE4_1 && TARGET_SSE_MATH)"
> > and:
> > (define_expand "rint2"
> >   [(use (match_operand:MODEF 0 "register_operand"))
> >(use (match_operand:MODEF 1 "nonimmediate_operand"))]
> >   "TARGET_USE_FANCY_MATH_387
> >|| (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)"
> > Only nearbyint tests flag_trapping_math, and only for the pre-sse4.1 case,
> 
> This is correct, since x87 frndint always generates precision
> (inexact) exceptions, but nearbyint should not generate any.
> 
> On a related note, trap on denormal is not IEEE exception, and
> documentation explicitly says that -fno-trapping-math affects only
> division by zero, overflow, underflow, inexact result and invalid
> operation. So, do we need to check for flag_trapping_math in
> ix86_builtin_vectorized_function for other builtins involving ROUND
> insn? Also, perhaps floor/ceil/trunc can be reimplemented using
> standard named expander instead.

I'd say we should follow what we do in the scalar code because if users
don't complain about that, it should be fine for vectorized code too.
And yes, reimplementing floor/ceil/trunc is something I'll try to do
incrementally, while it will be less important than rint which didn't have
the 512-bit cases implemented, it will still result in fewer decls that need
to be created.

> Your patch with stuff removed from ix86_builtin_vectorized_function is OK.

Thanks, here is what I've committed after another bootstrap/regtest:

2019-12-29  Jakub Jelinek  

PR target/93078
* config/i386/i386-builtins.c (ix86_builtin_vectorized_function):
Remove CASE_CFN_RINT handling.
* config/i386/i386-builtin.def (IX86_BUILTIN_RINTPD,
IX86_BUILTIN_RINTPS, IX86_BUILTIN_RINTPD256, IX86_BUILTIN_RINTPS256):
Remove.
* config/i386/sse.md (nearbyint2, rint2): New expanders
with VF iterator.

* gcc.target/i386/sse4_1-pr93078.c: New test.
* gcc.target/i386/avx-pr93078.c: New test.
* gcc.target/i386/avx512f-pr93078.c: New test.

--- gcc/config/i386/i386-builtins.c.jj  2019-12-09 15:02:31.077273254 +0100
+++ gcc/config/i386/i386-builtins.c 2019-12-28 12:11:05.509289523 +0100
@@ -1661,27 +1661,6 @@ ix86_builtin_vectorized_function (unsign
}
   break;
 
-CASE_CFN_RINT:
-  /* The round insn does not trap on denormals.  */
-  if (flag_trapping_math || !TARGET_SSE4_1)
-   break;
-
-  if (out_mode == DFmode && in_mode == DFmode)
-   {
- if (out_n == 2 && in_n == 2)
-   return ix86_get_builtin (IX86_BUILTIN_RINTPD);
- else if (out_n == 4 && in_n == 4)
-   return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
-   }
-  if (out_mode == SFmode && in_mode == SFmode)
-   {
- if (out_n == 4 && in_n == 4)
-   return ix86_get_builtin (IX86_BUILTIN_RINTPS);
- else if (out_n == 8 && in_n == 8)
-   return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
-   }
-  break;
-
 CASE_CFN_FMA:
   if (out_mode == DFmode && in_mode == DFmode)
{
--- gcc/config/i386/i386-builtin.def.jj 2019-12-09 15:02:31.110272755 +0100
+++ gcc/config/i386/i386-builtin.def2019-12-28 12:07:10.352821780 +0100
@@ -913,7 +913,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_F
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, 
"__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, 
(int) V2DF_FTYPE_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, 
"__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) 
V2DF_FTYPE_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, 
"__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, 
(int) V2DF_FTYPE_V2DF_ROUND)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd, 
"__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, 
(int) V2DF_FTYPE_V2DF_ROUND)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, 
"__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, 
(enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, 
"__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum 
rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND)
@@ -924,7 +923,6 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_F
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
"__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, 
(int) 

*ping*[patch, fortran] Fix PR 91541, ICE on valid for INDEX

2019-12-29 Thread Thomas Koenig

Am 19.12.19 um 08:23 schrieb Thomas Koenig:


Regression-tested. OK for trunk?


Ping?


*ping* [patch, fortran] Updated fix PR 92961, ICE on division by zero error in array bounds

2019-12-29 Thread Thomas Koenig

Am 22.12.19 um 16:28 schrieb Thomas Koenig:


here is an update for the fix for PR 92961, which also takes care
of the second test case in the PR (included in the first one).

The patch itself should be clear enough - make sure that there
is a MATCH_ERROR on matching an array spec which contains 0/(0).
Rather than pass around information several calls deep, I chose
to use a global variable.

Regression-tested. OK for trunk?


Ping?

I'd like to get the bug count at least go to 902 in the old year
(if 900 cannot be achieved :-)

Regards

Thomas