[Ada] Redundant comparison to True
This patch corrects the placement of an error message concerning a redundant comparison to True. The patch also add machinery to explain the nature of the redundant True. -- Source -- -- main.adb procedure Main is type Rec (Discr : Boolean) is null record; function Self (Flag : Boolean) return Boolean is begin return Flag; end Self; Obj : constant Rec := Rec'(Discr = True); begin if Self (True) = Obj.Discr then null; end if; end Main; -- Compilation and output -- $ gcc -c -gnatwa main.adb main.adb:12:19: warning: comparison with True is redundant main.adb:12:25: warning: discriminant Discr is always True Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Hristian Kirtchev kirtc...@adacore.com * sem_res.adb (Explain_Redundancy): New routine. (Resolve_Equality_Op): Place the error concerning a redundant comparison to True at the =. Try to explain the nature of the redundant True. Index: sem_res.adb === --- sem_res.adb (revision 198234) +++ sem_res.adb (working copy) @@ -6821,6 +6821,11 @@ -- impose an expected type (as can be the case in an equality operation) -- the expression must be rejected. + procedure Explain_Redundancy (N : Node_Id); + -- Attempt to explain the nature of a redundant comparison with True. If + -- the expression N is too complex, this routine issues a general error + -- message. + function Find_Unique_Access_Type return Entity_Id; -- In the case of allocators and access attributes, the context must -- provide an indication of the specific access type to be used. If @@ -6850,6 +6855,72 @@ end if; end Check_If_Expression; + + -- Explain_Redundancy -- + + + procedure Explain_Redundancy (N : Node_Id) is + Error : Name_Id; + Val: Node_Id; + Val_Id : Entity_Id; + + begin + Val := N; + + -- Strip the operand down to an entity + + loop +if Nkind (Val) = N_Selected_Component then + Val := Selector_Name (Val); +else + exit; +end if; + end loop; + + -- The construct denotes an entity + + if Is_Entity_Name (Val) and then Present (Entity (Val)) then +Val_Id := Entity (Val); + +-- Do not generate an error message when the comparison is done +-- against the enumeration literal Standard.True. + +if Ekind (Val_Id) /= E_Enumeration_Literal then + + -- Build a customized error message + + Name_Len := 0; + Add_Str_To_Name_Buffer (?r?); + + if Ekind (Val_Id) = E_Component then + Add_Str_To_Name_Buffer (component ); + + elsif Ekind (Val_Id) = E_Constant then + Add_Str_To_Name_Buffer (constant ); + + elsif Ekind (Val_Id) = E_Discriminant then + Add_Str_To_Name_Buffer (discriminant ); + + elsif Is_Formal (Val_Id) then + Add_Str_To_Name_Buffer (parameter ); + + elsif Ekind (Val_Id) = E_Variable then + Add_Str_To_Name_Buffer (variable ); + end if; + + Add_Str_To_Name_Buffer ( is always True!); + Error := Name_Find; + + Error_Msg_NE (Get_Name_String (Error), Val, Val_Id); +end if; + + -- The construct is too complex to disect, issue a general message + + else +Error_Msg_N (?r?expression is always True!, Val); + end if; + end Explain_Redundancy; + - -- Find_Unique_Access_Type -- - @@ -6979,12 +7050,13 @@ if Warn_On_Redundant_Constructs and then Comes_From_Source (N) + and then Comes_From_Source (R) and then Is_Entity_Name (R) and then Entity (R) = Standard_True - and then Comes_From_Source (R) then Error_Msg_N -- CODEFIX - (?r?comparison with True is redundant!, R); + (?r?comparison with True is redundant!, N); +Explain_Redundancy (Original_Node (R)); end if; Check_Unset_Reference (L);
[Ada] gnatfind and source file names on Windows
On Windows, when gnatfind is called with a pattern and a source file name that includes capital letters, as in gnatfind Put_Line:A-textio.ads -r, gnatfind does not give the same output that it would have if the file was not capitalized (gnatfind Put_Line:a-textio.ads -r). This is corrected by this patch. Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Vincent Celier cel...@adacore.com * xref_lib.adb (Add_Entity): Use the canonical file names so that source file names with capital letters are found on platforms where file names are case insensitive. Index: xref_lib.adb === --- xref_lib.adb(revision 198221) +++ xref_lib.adb(working copy) @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1998-2012, Free Software Foundation, Inc. -- +-- Copyright (C) 1998-2013, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -272,18 +272,21 @@ end if; end if; - File_Ref := -Add_To_Xref_File - (Entity (File_Start .. Line_Start - 1), Visited = True); - Pattern.File_Ref := File_Ref; + declare + File_Name : String := Entity (File_Start .. Line_Start - 1); + begin + Osint.Canonical_Case_File_Name (File_Name); + File_Ref := Add_To_Xref_File (File_Name, Visited = True); + Pattern.File_Ref := File_Ref; - Add_Line (Pattern.File_Ref, Line_Num, Col_Num); + Add_Line (Pattern.File_Ref, Line_Num, Col_Num); - File_Ref := -Add_To_Xref_File - (ALI_File_Name (Entity (File_Start .. Line_Start - 1)), - Visited = False, - Emit_Warning = True); + File_Ref := + Add_To_Xref_File + (ALI_File_Name (File_Name), + Visited = False, + Emit_Warning = True); + end; end Add_Entity; ---
[Ada] Generation of routine _Postconditions
This patch suppresses the creation of routine _Postconditions when the related context lacks invariants or predicates and all postcindition aspect / pragmas are disabled. -- Source -- -- main.adb procedure Main is X : Integer := 0; procedure P with Post = X 0; procedure P is begin null; end P; begin P; end Main; -- Compilation and output -- $ gcc -c -gnat12 -gnatdg -gnatd.V main.adb Source recreated from tree for Main (body) -- procedure main is x : integer := 0; procedure main__p with post = x 0; pragma postcondition (check = x 0, message = failed postcondition from main.adb:5); procedure main__p is begin null; return; end main__p; begin main__p; return; end main; Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Hristian Kirtchev kirtc...@adacore.com * sem_ch6.adb (Contains_Enabled_Pragmas): New routine. (Process_PPCs): Generate procedure _Postconditions only when the context has invariants or predicates or enabled aspects/pragmas. Index: sem_ch6.adb === --- sem_ch6.adb (revision 198234) +++ sem_ch6.adb (working copy) @@ -11196,6 +11196,10 @@ -- under the same visibility conditions as for other invariant checks, -- the type invariant must be applied to the returned value. + function Contains_Enabled_Pragmas (L : List_Id) return Boolean; + -- Determine whether list L has at least one enabled pragma. The routine + -- ignores nother non-pragma elements. + procedure Expand_Contract_Cases (CCs : Node_Id; Subp_Id : Entity_Id); -- Given pragma Contract_Cases CCs, create the circuitry needed to -- evaluate case guards and trigger consequence expressions. Subp_Id @@ -11263,6 +11267,26 @@ end if; end Check_Access_Invariants; + -- + -- Contains_Enabled_Pragmas -- + -- + + function Contains_Enabled_Pragmas (L : List_Id) return Boolean is + Prag : Node_Id; + + begin + Prag := First (L); + while Present (Prag) loop +if Nkind (Prag) = N_Pragma and then Is_Ignored (Prag) then + return False; +end if; + +Next (Prag); + end loop; + + return True; + end Contains_Enabled_Pragmas; + --- -- Expand_Contract_Cases -- --- @@ -12252,8 +12276,11 @@ -- If we had any postconditions and expansion is enabled, or if the -- subprogram has invariants, then build the _Postconditions procedure. - if (Present (Plist) or else Invariants_Or_Predicates_Present) -and then Expander_Active + if Expander_Active +and then + (Invariants_Or_Predicates_Present + or else + (Present (Plist) and then Contains_Enabled_Pragmas (Plist))) then if No (Plist) then Plist := Empty_List;
Re: patch to fix constant math -5th patch, rtl
Richard Biener richard.guent...@gmail.com writes: I suppose the above should use immed_double_int_const (v, mode), too, In practice it doesn't matter, because... which oddly only ever truncates to mode for modes = HOST_BITS_PER_WIDE_INT via gen_int_mode. ...right. That's because there's not really any proper support for non-power-of-2 modes. Partial modes like PDI are specified by things like: PARTIAL_INT_MODE (DI); which is glaringly absent of any bit width. So if the constant is big enough to need 2 HWIs, it in practice must be exactly 2 HWIs wide. One of the advantages of wide_int is that it allows us to relax this restriction: we can have both (a) mode widths greater than HOST_BITS_PER_WIDE_INT*2 and (b) mode widths that are greater than HOST_BITS_PER_WIDE_INT while not being a multiple of it. In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Richard
[Ada] New gnatls switch -aPdir
A new switch -aPdir is added to gnatls. When gnatls is called with one or several switches -aPdir and also switch -v, the directories specified in the -aP switches are displayed immediately after the current directory in the Project Search Path. A new warning is issued by gnatls for switches that are not recognized by gnatls. The GNAT driver when called as gnat ls/list -aPdir ... will also used the switch -aPdir in its invocation of gnatls. Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Vincent Celier cel...@adacore.com * gnat_ugn.texi: Document new gnatls switch -aPdir. * gnatcmd.adb: Pass switch -aPdir to gnatls. * gnatls.adb (Scan_Ls_Arg): Process new switch -aPdir. Issue a warning for unknown switches. (Usage): Add line for new switch -aPdir. Index: gnatcmd.adb === --- gnatcmd.adb (revision 198226) +++ gnatcmd.adb (working copy) @@ -1766,8 +1766,17 @@ (Root_Environment.Project_Path, Argv (Argv'First + 3 .. Argv'Last)); - Remove_Switch (Arg_Num); + -- Pass -aPdir to gnatls + if The_Command = List then +Arg_Num := Arg_Num + 1; + + -- but not to other tools + + else +Remove_Switch (Arg_Num); + end if; + -- -eL Follow links for files elsif Argv.all = -eL then Index: gnat_ugn.texi === --- gnat_ugn.texi (revision 198236) +++ gnat_ugn.texi (working copy) @@ -16393,6 +16393,10 @@ Source path manipulation. Same meaning as the equivalent @command{gnatmake} flags (@pxref{Switches for gnatmake}). +@item ^-aP^/ADD_PROJECT_SEARCH_DIR=^@var{dir} +@cindex @option{^-aP^/ADD_PROJECT_SEARCH_DIR=^} (@code{gnatls}) +Add @var{dir} at the beginning of the project search dir. + @item --RTS=@var{rts-path} @cindex @option{--RTS} (@code{gnatls}) Specifies the default location of the runtime library. Same meaning as the Index: gnatls.adb === --- gnatls.adb (revision 198221) +++ gnatls.adb (working copy) @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1992-2011, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -1253,6 +1253,8 @@ FD : File_Descriptor; Len : Integer; + OK : Boolean; + begin pragma Assert (Argv'First = 1); @@ -1260,6 +1262,7 @@ return; end if; + OK := True; if Argv (1) = '-' then if Argv'Length = 1 then Fail (switch character cannot be followed by a blank); @@ -1297,6 +1300,11 @@ elsif Argv'Length = 3 and then Argv (2 .. 3) = aL then Add_Lib_Dir (Argv (4 .. Argv'Last)); + -- Processing for -aPdir + + elsif Argv'Length 3 and then Argv (1 .. 3) = -aP then +Add_Directories (Prj_Path, Argv (4 .. Argv'Last)); + -- Processing for -nostdinc elsif Argv (2 .. Argv'Last) = nostdinc then @@ -1316,7 +1324,7 @@ when 'l' = License := True; when 'V' = Very_Verbose_Mode := True; - when others = null; + when others = OK := False; end case; -- Processing for -files=file @@ -1396,6 +1404,9 @@ Opt.No_Stdinc := True; Opt.RTS_Switch := True; end if; + + else +OK := False; end if; -- If not a switch, it must be a file name @@ -1403,6 +1414,13 @@ else Add_File (Argv); end if; + + if not OK then + Write_Str (warning: unknown switch ); + Write_Str (Argv); + Write_Line (); + end if; + end Scan_Ls_Arg; --- @@ -1484,6 +1502,11 @@ Write_Str ( -aOdir specify object files search path); Write_Eol; + -- Line for -aP switch + + Write_Str ( -aPdir specify project search path); + Write_Eol; + -- Line for -I switch Write_Str ( -Idir like -aIdir -aOdir);
[Ada] Do not apply float conversion checks if expansion not enabled
We do not need checks if we are not generating code, and in SPARK mode, we specifically don't want the frontend to expand these checks, which are dealt with directly in the formal verification backend. This is similar to what is already done for other checks. Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Yannick Moy m...@adacore.com * checks.adb (Apply_Float_Conversion_Check): Do not apply checks if full expansion is not enabled. Index: checks.adb === --- checks.adb (revision 198226) +++ checks.adb (working copy) @@ -1907,6 +1907,15 @@ Reason : RT_Exception_Code; begin + -- We do not need checks if we are not generating code (i.e. the full + -- expander is not active). In SPARK mode, we specifically don't want + -- the frontend to expand these checks, which are dealt with directly + -- in the formal verification backend. + + if not Full_Expander_Active then + return; + end if; + if not Compile_Time_Known_Value (LB) or not Compile_Time_Known_Value (HB) then
[AArch64] Fix vld1q_* asm constraints in arm_neon.h
Hi, The vld1q_* patterns in arm_neon.h did not correctly describe their register/memory constraints. This could lead to incorrect code generation where they were used. This patch fixes the bug by giving the patterns the correct register constraints. Regression tested on aarch64-none-elf without regressions. OK? Thanks, James Greenhalgh --- gcc/ 2013-04-24 James Greenhalgh james.greenha...@arm.com * config/aarch64/arm_neon.h (vld1q_lane*): Fix constraints. (vld1q_dup_sufp8, 16, 32, 64): Likewise. (vld1q_sufp8, 16, 32, 64): Likewise. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 5e25c77..58343a7 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8390,10 +8390,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t * a) { float32x2_t result; - __asm__ (ld1r {%0.2s},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.2s}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8401,10 +8401,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) vld1_dup_f64 (const float64_t * a) { float64x1_t result; - __asm__ (ld1 {%0.1d},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.1d}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8412,10 +8412,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vld1_dup_p8 (const poly8_t * a) { poly8x8_t result; - __asm__ (ld1r {%0.8b},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.8b}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8423,10 +8423,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vld1_dup_p16 (const poly16_t * a) { poly16x4_t result; - __asm__ (ld1r {%0.4h},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.4h}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8434,10 +8434,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vld1_dup_s8 (const int8_t * a) { int8x8_t result; - __asm__ (ld1r {%0.8b},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.8b}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8445,10 +8445,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vld1_dup_s16 (const int16_t * a) { int16x4_t result; - __asm__ (ld1r {%0.4h},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.4h}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8456,10 +8456,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vld1_dup_s32 (const int32_t * a) { int32x2_t result; - __asm__ (ld1r {%0.2s},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.2s}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8467,10 +8467,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_dup_s64 (const int64_t * a) { int64x1_t result; - __asm__ (ld1 {%0.1d},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.1d}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8478,10 +8478,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vld1_dup_u8 (const uint8_t * a) { uint8x8_t result; - __asm__ (ld1r {%0.8b},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.8b}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8489,10 +8489,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vld1_dup_u16 (const uint16_t * a) { uint16x4_t result; - __asm__ (ld1r {%0.4h},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.4h}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8500,10 +8500,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vld1_dup_u32 (const uint32_t * a) { uint32x2_t result; - __asm__ (ld1r {%0.2s},[%1] - : =w(result) - : r(a) - : memory); + __asm__ (ld1r {%0.2s}, %1 + : =w(result) + : Utv(*a) + : /* No clobbers */); return result; } @@ -8511,10 +8511,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vld1_dup_u64 (const uint64_t * a) { uint64x1_t result; - __asm__ (ld1 {%0.1d},[%1] -
Re: patch to fix constant math -5th patch, rtl
On 04/24/2013 09:36 AM, Richard Biener wrote: On Wed, Apr 24, 2013 at 2:44 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: Can we in such cases please to a preparatory patch and change the CONST_INT/CONST_DOUBLE paths to do an explicit [sz]ext to mode precision first? I'm not sure what you mean here. CONST_INT HWIs are already sign-extended from mode precision to HWI precision. The 8-bit value 0xb1000 must be represented as (const_int -128); nothing else is allowed. E.g. (const_int 128) is not a valid QImode value on BITS_PER_UNIT==8 targets. Yes, that's what I understand. But consider you get a CONST_INT that is _not_ a valid QImode value. Current code simply trusts that it is, given the context from ... And the fact that it we have to trust but cannot verify is a severe problem at the rtl level that is not going to go away.what i have been strongly objecting to is your idea that just because we cannot verify it, we can thus go change it in some completely different way (i.e. the infinite precision nonsense that you keep hitting us with) and it will all be ok. I have three problems with this. 1) Even if we could do this, it gives us answers that are not what the programmer expects!! Understand this!!! Programmers expect the code to behave the same way if they optimize it or not. If you do infinite precision arithmetic you get different answers than the machine may give you. While the C and C++ standards allow this, it is NOT desirable. While there are some optimizations that must make visible changes to be effective, this is certainly not the case with infinite precision mathMaking the change to infinite precision math only because you think is pretty is NOT best practices and will only give GCC a bad reputation in the community. Each programming language defines what it means to do constant arithmetic and by and large, our front ends do this the way they say. But once you go beyond that, you are squarely in the realm where an optimizer is expected to try to make the program run fast without changing the results. Infinite precision math in the optimizations is visible in that A * B / C may get different answers between an infinite precision evaluation and one that is finite precision as specified by the types. And all of this without any possible upside to the programmer. Why would we want to drive people to use llvm? This is my primary objection.If you ever gave any reason for infinite precision aside from that you consider it pretty, then i would consider it.BUT THIS IS NOT WHAT PROGRAMMERS WANT 2) The rtl level of GCC does not follow best practices by today's standards.It is quite fragile. At this point, the best that can be said is that it generally seems to work. What you are asking is for us to make the assumption that the code is in fact in better shape than it is.I understand that in your mind, you are objecting to letting the back ends hold back something that you believe the middle ends should do, but the truth is that this is a bad idea for the middle ends. 3) If i am on a 32 bit machine and i say GEN_INT (0x), i get a 32 bit word with 32 1s in it. There is no other information. In particular there is no information that tells me was that a -1 or was that the largest positive integer. We do not have GEN_INTS and a GEN_INTU, we just have GEN_INT. Your desire is that we can take those 32 bits and apply the lt_p function, not the ltu_p or lts_p function, but an lu_p function and use that to compare those 32 bits to something. At the rtl level there is simply not enough information there to sign extend this value. This will never work without a major rewrite of the back ends.
[Ada] Entities for subprogram body should not have a contract node attached
Contract nodes are meant to carry information for subprogram spec entities, not subprogram body entities (for bodies that are completions of specs). So we remove the contract node when a spec entity is changed to a body entity. Depending on how refined contracts on bodies are handled, this may need to be updated in the future. Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Yannick Moy m...@adacore.com * sem_ch6.adb (Analyze_Generic_Subprogram_Body, Analyze_Subprogram_Body_Helper): Reset contract node to Empty before setting entity to E_Subprogram_Body. * sem_ch8.adb (Analyze_Subprogram_Renaming): Reset contract node to Empty before setting entity to E_Subprogram_Body. Index: sem_ch6.adb === --- sem_ch6.adb (revision 198237) +++ sem_ch6.adb (working copy) @@ -1107,6 +1107,7 @@ -- Visible generic entity is callable within its own body Set_Ekind (Gen_Id, Ekind (Body_Id)); + Set_Contract (Body_Id, Empty); Set_Ekind (Body_Id, E_Subprogram_Body); Set_Convention (Body_Id, Convention (Gen_Id)); Set_Is_Obsolescent (Body_Id, Is_Obsolescent (Gen_Id)); @@ -2902,6 +2903,7 @@ end if; Set_Corresponding_Body (Unit_Declaration_Node (Spec_Id), Body_Id); + Set_Contract (Body_Id, Empty); Set_Ekind (Body_Id, E_Subprogram_Body); Set_Scope (Body_Id, Scope (Spec_Id)); Set_Is_Obsolescent (Body_Id, Is_Obsolescent (Spec_Id)); Index: sem_ch8.adb === --- sem_ch8.adb (revision 198221) +++ sem_ch8.adb (working copy) @@ -2435,6 +2435,7 @@ -- constructed later at the freeze point, so indicate that the -- completion has not been seen yet. + Set_Contract (New_S, Empty); Set_Ekind (New_S, E_Subprogram_Body); New_S := Rename_Spec; Set_Has_Completion (Rename_Spec, False);
[PATCH][RFC] Preserve loops from CFG build on
The following patch makes us create and preserves loops from the point we build the CFG. As-is the patch passes bootstrap on x86_64-unknown-linux-gnu for all languages including Ada and has no ICEs running the testsuite. There are a few testcases that fail which I still have to investigate. I tried to properly outline the loop tree on SESE region outlining (used by OMP and auto-par), I have a followup patch for that but it's complicated by the fact that OMP doesn't preserve loops enough to do things fancy and autopar wanting to verify loops after each loop outlined (and I don't want to remove that or forcefully fixup loops all the time). So I need more time on that bit. Anyway, good enough for comments at least (I'm currently checking if the tree-sra bits are still necessary). One change with the patch is that loop fixup can be delayed until the next loop_optimzer_init call which means that passes looking at the loop tree but not calling loop_optimizer_init need to watch out for all sorts of oddities in the loop tree. Generally you are expected to call loop_optimizer_init in such case. Any comments? In the final version of the patch (or maybe incrementally) I'd like to drop PROP_loops, not compute loops at CFG build but at first loop_optimizer_init and make loop_optimizer_finalize not free current_loops (but only drop to the most basic set of loop tree state). I'd also eventually drop most explicit calls to fixup_loop_structure (given lazy handling in loop_optimizer_init). Richard. 2013-03-19 Richard Biener rguent...@suse.de * tree-cfg.c (execute_build_cfg): Build the loop tree. (pass_build_cfg): Provide PROP_loops. (move_sese_region_to_fn): Remove loops that are outlined into fn for now. * tree-inline.c: Include cfgloop.h. (initialize_cfun): Do not drop PROP_loops. (copy_loops): New function. (copy_cfg_body): Copy loop structure. (tree_function_versioning): Initialize destination loop tree. ??? * tree-sra.c (modify_function): Always cleanup the CFG (and fix loops). * tree-ssa-loop.c (pass_tree_loop_init): Do not provide PROP_loops. * loop-init.c (loop_optimizer_init): Fixup loops if required. * tree-optimize.c (execute_fixup_cfg): If we need to cleanup the CFG make sure we fixup loops as well. * tree-ssa-tail-merge.c: Include cfgloop.h. (find_same_succ_bb): Avoid merging loop latches with anything. * lto-streamer-out.c (output_struct_function_base): Drop PROP_loops for now. * tree-ssa-phiopt.c: Include tree-scalar-evolution.h. (tree_ssa_cs_elim): Initialize the loop optimizer and SCEV. * ipa-split.c: Include cfgloop.h. (split_function): Add the new return block to the loop tree root. * tree-cfgcleanup.c (remove_forwarder_block_with_phi): Return whether we have removed the forwarder block. (merge_phi_nodes): If we removed a forwarder fixup loops. * cfgloop.h (place_new_loop): Declare. * cfgloopmanip.c (place_new_loop): Export. * Makefile.in (asan.o): Add $(CFGLOOP_H) dependency. (tree-switch-conversion.o): Likewise. (tree-complex.o): Likewise. (tree-inline.o): Likewise. (tree-ssa-tailmerge.o): Likewise. (ipa-split.o): Likewise. (tree-ssa-phiopt.o): Add $(SCEV_H) dependency. * tree-switch-conversion.c: Include cfgloop.h (process_switch): If we emit a bit-test cascade, schedule loops for fixup. * tree-complex.c: Include cfgloop.h. (expand_complex_div_wide): Properly add new basic-blocks to loops. * asan.c: Include cfgloop.h. (create_cond_insert_point): Properly add new basic-blocks to loops, schedule loop fixup. * omp-low.c (expand_parallel_call): Properly add new basic-blocks to loops. (expand_omp_for_generic): Likewise. (expand_omp_sections): Likewise. (expand_omp_atomic_pipeline): Schedule loops for fixup. Index: trunk/gcc/tree-cfg.c === *** trunk.orig/gcc/tree-cfg.c 2013-04-24 10:08:37.0 +0200 --- trunk/gcc/tree-cfg.c2013-04-24 16:05:54.972456799 +0200 *** execute_build_cfg (void) *** 220,225 --- 220,227 fprintf (dump_file, Scope blocks:\n); dump_scope_blocks (dump_file, dump_flags); } + cleanup_tree_cfg (); + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); return 0; } *** struct gimple_opt_pass pass_build_cfg = *** 236,245 0, /* static_pass_number */ TV_TREE_CFG,/* tv_id */ PROP_gimple_leh,/* properties_required */ ! PROP_cfg, /* properties_provided */ 0, /* properties_destroyed */ 0,
Re: patch to fix constant math -5th patch, rtl
On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: I suppose the above should use immed_double_int_const (v, mode), too, In practice it doesn't matter, because... which oddly only ever truncates to mode for modes = HOST_BITS_PER_WIDE_INT via gen_int_mode. ...right. That's because there's not really any proper support for non-power-of-2 modes. Partial modes like PDI are specified by things like: PARTIAL_INT_MODE (DI); which is glaringly absent of any bit width. So if the constant is big enough to need 2 HWIs, it in practice must be exactly 2 HWIs wide. Ah, of course. One of the advantages of wide_int is that it allows us to relax this restriction: we can have both (a) mode widths greater than HOST_BITS_PER_WIDE_INT*2 and (b) mode widths that are greater than HOST_BITS_PER_WIDE_INT while not being a multiple of it. In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). Richard. Richard
[C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
Hi, I believe this is all we need in order to get the ball rolling in the library for -std=c++1y. If we think it's conceptually clearer (no difference in practice, because cxx11 == cxx0x), for the legacy C++0x macro we could also do: if (cxx_dialect = cxx0x cxx_dialect cxx1y) cpp_define (pfile, __GXX_EXPERIMENTAL_CXX0X__); I'm finishing testing the below on x86_64-linux. Thanks, Paolo. /// /c-family 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * c-cppbuiltin.c (c_cpp_builtins): Define __GXX_EXPERIMENTAL_CXX1Y__. /doc 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * cpp.texi: Document __GXX_EXPERIMENTAL_CXX1Y__. /testsuite 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * g++.dg/cpp1y/cxx1y_macro.C: New. Index: c-family/c-cppbuiltin.c === --- c-family/c-cppbuiltin.c (revision 198231) +++ c-family/c-cppbuiltin.c (working copy) @@ -713,8 +713,10 @@ c_cpp_builtins (cpp_reader *pfile) cpp_define (pfile, __DEPRECATED); if (flag_rtti) cpp_define (pfile, __GXX_RTTI); - if (cxx_dialect = cxx0x) + if (cxx_dialect == cxx0x) cpp_define (pfile, __GXX_EXPERIMENTAL_CXX0X__); + if (cxx_dialect = cxx1y) +cpp_define (pfile, __GXX_EXPERIMENTAL_CXX1Y__); } /* Note that we define this for C as well, so that we know if __attribute__((cleanup)) will interface with EH. */ Index: doc/cpp.texi === --- doc/cpp.texi(revision 198231) +++ doc/cpp.texi(working copy) @@ -2319,6 +2319,13 @@ features likely to be included in C++0x are availa features are experimental, and may change or be removed in future versions of GCC. +@item __GXX_EXPERIMENTAL_CXX1Y__ +This macro is defined when compiling a C++ source file with the option +@option{-std=c++1y} or @option{-std=gnu++1y}. It indicates that some +features likely to be included in C++1y are available. Note that these +features are experimental, and may change or be removed in future +versions of GCC. + @item __GXX_WEAK__ This macro is defined when compiling a C++ source file. It has the value 1 if the compiler will use weak symbols, COMDAT sections, or Index: testsuite/g++.dg/cpp1y/cxx1y_macro.C === --- testsuite/g++.dg/cpp1y/cxx1y_macro.C(revision 0) +++ testsuite/g++.dg/cpp1y/cxx1y_macro.C(working copy) @@ -0,0 +1,5 @@ +// { dg-options -std=c++1y } + +#ifndef __GXX_EXPERIMENTAL_CXX1Y__ +#error +#endif
[Ada] Aspects on expression function without prior declaration
This patch corrects the parser to detect an illegal placement of the aspect specification list. -- Source -- -- semantics.adb: procedure Semantics is function Error (Formal : Integer) return Boolean with Pre = Formal 1234 is (Formal = 5678); begin null; end Semantics; -- Compilation and output -- $ gcc -c -gnat12 semantics.adb semantics.adb:3:11: aspect specifications must come after parenthesized expression Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Hristian Kirtchev kirtc...@adacore.com * par-ch6.adb (P_Subprogram): Detect an illegal placement of the aspect specification list in the context of expression functions. Index: par-ch6.adb === --- par-ch6.adb (revision 198221) +++ par-ch6.adb (working copy) @@ -838,6 +838,22 @@ (\unit must be compiled with -gnat2012 switch!); end if; + -- Catch an illegal placement of the aspect specification + -- list: + + --function_specification + -- [aspect_specification] is (expression); + + -- This case is correctly processed by the parser because + -- the expression function first appears as a subprogram + -- declaration to the parser. + + if Is_Non_Empty_List (Aspects) then + Error_Msg + (aspect specifications must come after parenthesized + expression, Sloc (First (Aspects))); + end if; + -- Parse out expression and build expression function Body_Node :=
[Ada] Anonymous access components that denote tasks
In Ada 2012 it is possible to complete an incomplete type with a record one of whose components is an anonymous access to task initialized with an allocator. The analysis of the allocator in the init_proc for the type creates an anonymous access that requires a master id. This must be obtained from the added formal of the init_proc, using the master renaming machinery. Compiling and executing main.adb must yield: Yeah --- with P; use P; procedure Main is THing : T; begin Thing.R.Here; end; --- pragma Ada_2012; package P is type T; task type RT (Self : not null access T) is entry Here; end; type T is tagged limited record R : access RT := new RT (T'Unrestricted_Access); Name : String (1..4) := Yeah; end record; end P; --- with Text_IO; use Text_IO; package body P is task body RT is begin accept Here do Put_Line (Self.Name); end; end; end P; Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Ed Schonberg schonb...@adacore.com * exp_ch4.adb (Expand_N_Allocator): If the designated object has tasks, and the pointer type is an itype that has no master id, create a master renaming in the current context, which can only be an init_proc. Index: exp_ch4.adb === --- exp_ch4.adb (revision 198241) +++ exp_ch4.adb (working copy) @@ -4577,9 +4577,19 @@ -- access type did not get expanded. Salvage it now. if not Restriction_Active (No_Task_Hierarchy) then -pragma Assert (Present (Parent (Base_Type (PtrT; -Expand_N_Full_Type_Declaration - (Parent (Base_Type (PtrT))); +if Present (Parent (Base_Type (PtrT))) then + Expand_N_Full_Type_Declaration + (Parent (Base_Type (PtrT))); + +else + -- If the type of the allocator is an itype, + -- the master must exist in the context. This + -- is the case when the allocator initializes + -- an access component in an init-proc. + + pragma Assert (Is_Itype (PtrT)); + Build_Master_Renaming (PtrT, N); +end if; end if; end if;
Re: Fix C++ testcases for size_t vs intptr_t
On Wed, Apr 24, 2013 at 4:59 AM, Bernd Schmidt ber...@codesourcery.com wrote: This fixes C++ testcases that used size_t when casting a pointer to integer so that they use intptr_t instead. There's also an testcase using pointer subtraction where ptrdiff_t is the correct choice, and a fix to the ptrmem.C testcase to use sizeof on a function pointer rather than a data pointer. A similar patch for C testcases was already applied last year. Bootstrapped and tested on x86_64-linux. Ok? Bernd Yes, OK. Thanks! -- Gaby
Re: patch to fix constant math -5th patch, rtl
On 04/24/2013 10:42 AM, Richard Biener wrote: On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: I suppose the above should use immed_double_int_const (v, mode), too, In practice it doesn't matter, because... which oddly only ever truncates to mode for modes = HOST_BITS_PER_WIDE_INT via gen_int_mode. ...right. That's because there's not really any proper support for non-power-of-2 modes. Partial modes like PDI are specified by things like: PARTIAL_INT_MODE (DI); which is glaringly absent of any bit width. So if the constant is big enough to need 2 HWIs, it in practice must be exactly 2 HWIs wide. Ah, of course. One of the advantages of wide_int is that it allows us to relax this restriction: we can have both (a) mode widths greater than HOST_BITS_PER_WIDE_INT*2 and (b) mode widths that are greater than HOST_BITS_PER_WIDE_INT while not being a multiple of it. In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). Richard. yes but you still have the problem with partial ints having no length.Our plan was to be very careful and make sure that at no point were we doing anything that makes it harder to put modes in const_ints, but that is different from going thru everything and doing it. Partially because of this discussion and some issues that you brought up with patch 4, i am removing the trick of being able to say 'wi + rtl' because there is no mode for the rtl. i am leaving the 'wi + tree' because there is enough to info in the treecst to make this work. but you are going to have to say something wi::add (mode, rtl) see, i am willing to do things that work better in the tree world than in the rtl world. kenny Richard
Re: Compute precise counter histogram at LTO
On Wed, Apr 24, 2013 at 6:37 AM, Teresa Johnson tejohn...@google.com wrote: On Mon, Apr 22, 2013 at 11:16 AM, Jan Hubicka hubi...@ucw.cz wrote: Hi, sorry for getting back to this late. That's a larger error than I had expected from the merging, although as you note it is an approximation so there is going to be some amount of error. If the error is that large then maybe there is a better merging algorithm, since in the non-LTO case we won't be able to recompute it exactly. For cc1, what was your test case - profiledbootstrap or something simpler? I can try to reproduce this and see if it is another bug or just due to the approximation. I've been using Rong's tool to compute the exactly merged histogram from the gcov-merged histogram for perlbmk. I tried a couple test cases - with the 5 train inputs, and with the 3 ref inputs. In both cases I am seeing up to 200% or so difference in some of the working set min counter values, although the error is not as high for the higher working set percentages. But large enough to cause a potential performance issue nonetheless. One thing that confuse me is why the error tends to be in positive direction. Since we are minimizing the counter during merging, I would expect us to more or less consistently underestimate the counters. Do you have any intuition here? Hi Honza, Yes, I think I know what is going on. What to do about it is a different story. =) From comparing the histograms merged by libgcov to the exactly merged histograms produced by Rong's tool at larger histogram sizes, I think the main issue is that the hotspots are slightly different between different runs, and this causes inaccuracies in the merged histogram unless you have global counter information. I tried increasing the histogram up to a much larger size - 16128. This is 256 linear subranges per bit instead of the default 4. This increased the accuracy at the high end of the profile range, but I was still left with a fair amount of inaccuracy in the working set (except the 99.9% bucket, which is very close now). I confirmed that some of the hottest counters between different training inputs (of perlbmk) are from different functions. The hottest 2 counters are the same between all 3 runs, but after that there are some differences. When the hotspots are different, those large counter values actually correspond to smaller counter values in the other run. Therefore, at the high end of the counter range, the gcov-merged counter values are artificially high, because we are summing large counter values that shouldn't be correlated. Similarly, some of the smaller counter values are being summed together that shouldn't be, producing artificially low merged counters. For example, in the libgcov-merged profile, the number of 0 valued counters will be the min of the number of 0-valued counters in the individual runs. But in the exactly-merged profile, there are fewer 0-valued counters, because some of the 0 counters from individual runs were non-zero in other runs. I confirmed by comparing graphs of the counter values produced by each merge that there are more very high and very low counter values in the libgcov-merged profile. Graphing the counter values accumulated from highest to lowest (similar to what the working set computation does), shows that the accumulated sum grows faster in the libgcov-merged case as a result, and this is causing the various working set cutoff values to be hit earlier, resulting in higher min counter values. I haven't come up with any great ideas on how to handle this issue though. Possibly artificially reduce the min counter value for the working sets when there were more than one run? Also if you have setup with your tool, it may be nice to double check that the histograms produced by the LTO pass actually match the histogram produced by the Ron's external tool. I am not sure if his tool takes into account the estimated execution times of basic blocks. If not it may be interesting experiment by itself, since we will get how well counting counts alone estimate the times. (I would expect it to be rather good, but it is always better to sanity check). Rong sent his tool for review for the gcc google 4_7 branch for now, but it is usable for trunk too. See: http://gcc.gnu.org/ml/gcc-patches/2012-11/msg02141.html Woops, wrong link. Correct one is: http://gcc.gnu.org/ml/gcc-patches/2013-04/msg00607.html Teresa It doesn't take any estimated times into account - it just merges the counters and recomputes the histogram based on all the counters. We could take an exactly merged profile produced by his tool and feed it into LTO with your patch and get the time estimate comparison your patch dumps out. But I would expect it to be the same since like LTO it has the same global set of counter input? Teresa It looks like the histogram min counters are always off in the larger direction
[Ada] Disabled invariants and preconditions and _Postconditions
This patch disables the generation of internal procedure _Postconditions when invariants and preconditions are disabled. -- Source -- -- main.adb procedure Main is X : Integer := 0; type R is new Integer with Predicate = X 0; package Pack is type T is tagged private; procedure P (Arg1 : in out T; Arg2 : in out R) with Post = X 0, Post'Class = X 0; private type T is tagged null record with Invariant = X 0; end Pack; package body Pack is procedure P (Arg1 : in out T; Arg2 : in out R) is begin null; end P; end Pack; use Pack; Y : T; Z : R := 2; begin P (Y, Z); end Main; - -- Compilation -- - $ gcc -c -gnat12 -gnatd.V -gnatDG main.adb $ grep postconditions main.adb.dg Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Hristian Kirtchev kirtc...@adacore.com * exp_ch6.adb (Expand_Actuals): Add a predicate check on an actual the related type has a predicate function. * sem_ch3.adb (Constant_Redeclaration): Ensure that the related type has an invariant procedure before building a call to it. * sem_ch6.adb (Append_Enabled_Item): New routine. (Check_Access_Invariants): Use routine Append_Enabled_Item to chain onto the list of postconditions. (Contains_Enabled_Pragmas): Removed. (Expand_Contract_Cases): Use routine Append_Enabled_Item to chain onto the list of postconditions. (Invariants_Or_Predicates_Present): Removed. (Process_PPCs): Partially reimplemented. Index: sem_ch3.adb === --- sem_ch3.adb (revision 198241) +++ sem_ch3.adb (working copy) @@ -10761,13 +10761,9 @@ -- A deferred constant is a visible entity. If type has invariants, -- verify that the initial value satisfies them. - if Expander_Active and then Has_Invariants (T) then -declare - Call : constant Node_Id := - Make_Invariant_Call (New_Occurrence_Of (Prev, Sloc (N))); -begin - Insert_After (N, Call); -end; + if Has_Invariants (T) and then Present (Invariant_Procedure (T)) then +Insert_After (N, + Make_Invariant_Call (New_Occurrence_Of (Prev, Sloc (N; end if; end if; end Constant_Redeclaration; Index: exp_ch6.adb === --- exp_ch6.adb (revision 198234) +++ exp_ch6.adb (working copy) @@ -1728,17 +1728,19 @@ -- procedure does not include a predicate call, so it has to be -- generated explicitly. -if (Has_Aspect (E_Actual, Aspect_Predicate) - or else -Has_Aspect (E_Actual, Aspect_Dynamic_Predicate) - or else -Has_Aspect (E_Actual, Aspect_Static_Predicate)) - and then not Is_Init_Proc (Subp) +if not Is_Init_Proc (Subp) + and then (Has_Aspect (E_Actual, Aspect_Predicate) + or else +Has_Aspect (E_Actual, Aspect_Dynamic_Predicate) + or else +Has_Aspect (E_Actual, Aspect_Static_Predicate)) + and then Present (Predicate_Function (E_Actual)) then - if (Is_Derived_Type (E_Actual) -and then Is_Overloadable (Subp) -and then Is_Inherited_Operation_For_Type (Subp, E_Actual)) - or else Is_Entity_Name (Actual) + if Is_Entity_Name (Actual) + or else + (Is_Derived_Type (E_Actual) + and then Is_Overloadable (Subp) + and then Is_Inherited_Operation_For_Type (Subp, E_Actual)) then Append_To (Post_Call, Make_Predicate_Check (E_Actual, Actual)); Index: sem_ch6.adb === --- sem_ch6.adb (revision 198238) +++ sem_ch6.adb (working copy) @@ -332,14 +332,14 @@ end; end if; - Prev := Current_Entity_In_Scope (Defining_Entity (Spec)); + Prev := Current_Entity_In_Scope (Defining_Entity (Spec)); -- If there are previous overloadable entities with the same name, -- check whether any of them is completed by the expression function. if Present (Prev) and then Is_Overloadable (Prev) then - Def_Id := Analyze_Subprogram_Specification (Spec); - Prev := Find_Corresponding_Spec (N); + Def_Id := Analyze_Subprogram_Specification (Spec); + Prev := Find_Corresponding_Spec (N); end if; Ret := Make_Simple_Return_Statement (LocX, Expression (N)); @@ -11198,18 +11198,17 @@ Plist
[Ada] Fall-back termination handlers does not apply to Self
This patch fixes a small missunderstanding in the implementation of fall-back termination handlers. Previously, a fall-back termination handler set by a given task would apply for itself. However, it has been now corrected because it applies only to dependent tasks (see ARM C.7.3 par. 9/2). The following test case must generate only a OK: expected handler message, corresponding to the termination of the Child task triggering the fall-back termination handler set by its creator (and not the one set by task Child). $ gnatmake -q -gnat05 terminate_hierarchy $ terminate_hierarchy OK: expected handler - with Ada.Task_Termination; use Ada.Task_Termination; with Tasking; use Tasking; procedure Terminate_Hierarchy is begin Set_Dependents_Fallback_Handler (Monitor.Parent_Handler'Access); Child.Start; end Terminate_Hierarchy; with Ada.Task_Identification; use Ada.Task_Identification; with Ada.Task_Termination;use Ada.Task_Termination; with Ada.Exceptions; use Ada.Exceptions; package Tasking is protected Monitor is procedure Parent_Handler (C : Cause_Of_Termination; Id : Task_Id; X : Exception_Occurrence := Null_Occurrence); procedure Child_Handler (C : Cause_Of_Termination; Id : Task_Id; X : Exception_Occurrence := Null_Occurrence); end Monitor; task Child is entry Start; end Child; end Tasking; with Ada.Text_IO; use Ada.Text_IO; package body Tasking is protected body Monitor is procedure Parent_Handler (C : Cause_Of_Termination; Id : Task_Id; X : Exception_Occurrence := Null_Occurrence) is begin Put_Line (OK: expected handler); end Parent_Handler; procedure Child_Handler (C : Cause_Of_Termination; Id : Task_Id; X : Exception_Occurrence := Null_Occurrence) is begin Put_Line (KO: unexpected handler); end Child_Handler; end Monitor; task body Child is begin Set_Dependents_Fallback_Handler (Monitor.Child_Handler'Access); accept Start; end Child; end Tasking; Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Jose Ruiz r...@adacore.com * s-tassta.adb, s-tarest.adb (Task_Wrapper): Start looking for fall-back termination handlers from the parents, because they apply only to dependent tasks. * s-solita.adb (Task_Termination_Handler_T): Do not look for fall-back termination handlers because the environment task has no parent, and if it defines one of these handlers it does not apply to itself because they apply only to dependent tasks. Index: s-tassta.adb === --- s-tassta.adb(revision 198221) +++ s-tassta.adb(working copy) @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1992-2012, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- -- -- -- GNARL is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -1075,7 +1075,7 @@ procedure Search_Fall_Back_Handler (ID : Task_Id); -- Procedure that searches recursively a fall-back handler through the -- master relationship. If the handler is found, its pointer is stored - -- in TH. + -- in TH. It stops when the handler is found or when the ID is null. -- -- Search_Fall_Back_Handler -- @@ -1083,21 +1083,22 @@ procedure Search_Fall_Back_Handler (ID : Task_Id) is begin + -- A null Task_Id indicates that we have reached the root of the + -- task hierarchy and no handler has been found. + + if ID = null then +return; + -- If there is a fall back handler, store its pointer for later -- execution. - if ID.Common.Fall_Back_Handler /= null then + elsif ID.Common.Fall_Back_Handler /= null then TH := ID.Common.Fall_Back_Handler; -- Otherwise look for a fall back handler in the parent - elsif ID.Common.Parent /= null then + else Search_Fall_Back_Handler (ID.Common.Parent); - - -- Otherwise, do nothing - - else -return; end if; end Search_Fall_Back_Handler; @@ -1331,9 +1332,12 @@ TH := Self_ID.Common.Specific_Handler; else -- Look for a fall-back handler following the master relationship - -- for the
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
On Wed, Apr 24, 2013 at 9:43 AM, Paolo Carlini paolo.carl...@oracle.com wrote: Hi, I believe this is all we need in order to get the ball rolling in the library for -std=c++1y. If we think it's conceptually clearer (no difference in practice, because cxx11 == cxx0x), for the legacy C++0x macro we could also do: if (cxx_dialect = cxx0x cxx_dialect cxx1y) cpp_define (pfile, __GXX_EXPERIMENTAL_CXX0X__); I'm finishing testing the below on x86_64-linux. Thanks, Paolo. /// I prefer the patch as you posted it. I would like us to actually move to deprecate __GXX_EXPERIMENTAL_CXX0X__ in GCC-4.9 and remove it in the release after that. I don't think our support C++11 is still that experimental. we can't have experimental support for two C++ standards going on :-) Other implementations are thumping the chest about being C++11 feature complete for less than what we offer. Patch OK. -- Gaby
Re: patch to fix constant math -5th patch, rtl
Richard Biener richard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). I agree it doesn't match the existing behaviour of HWI for CONST_INT or double-int for CONST_DOUBLE, but I think that's very much a good thing. The model for HWIs at the moment is that you have to truncate results to the canonical form after every operation where it matters. As you proved in your earlier message about the plus_constant bug, that's easily forgotten. I don't think the rtl code is doing all CONST_INT arithmetic on full HWIs because it wants to: it's doing it because that's the way C/C++ arithmetic on primitive types works. In other words, the current CONST_INT code is trying to emulate N-bit arithmetic (for gcc runtime N) using a single primitive integer type. wide_int gives us N-bit arithmetic directly; no emulation is needed. If your point is that an arbitrary-precision wide_int could be used by other (non-rtl, and probably non-tree) clients, then I don't really see the need. We already have mpz_t for that. What we don't have, and what we IMO need, is something that performs N-bit arithmetic for runtime N. It seems better to have a single class that does that for us (wide_int), rather than scatter N-bit emulation throughout the codebase, which is what we do now. Richard
Re: patch to fix constant math -5th patch, rtl
On Wed, Apr 24, 2013 at 4:35 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: On 04/24/2013 09:36 AM, Richard Biener wrote: On Wed, Apr 24, 2013 at 2:44 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: Can we in such cases please to a preparatory patch and change the CONST_INT/CONST_DOUBLE paths to do an explicit [sz]ext to mode precision first? I'm not sure what you mean here. CONST_INT HWIs are already sign-extended from mode precision to HWI precision. The 8-bit value 0xb1000 must be represented as (const_int -128); nothing else is allowed. E.g. (const_int 128) is not a valid QImode value on BITS_PER_UNIT==8 targets. Yes, that's what I understand. But consider you get a CONST_INT that is _not_ a valid QImode value. Current code simply trusts that it is, given the context from ... And the fact that it we have to trust but cannot verify is a severe problem at the rtl level that is not going to go away.what i have been strongly objecting to is your idea that just because we cannot verify it, we can thus go change it in some completely different way (i.e. the infinite precision nonsense that you keep hitting us with) and it will all be ok. Appearantly it is all ok because that's exactly what we have today (and had for the last 25 years). CONST_INT encodes infinite precision signed values (with the complication that a QImode 0x80 isn't valid, thus all modes are signed as well it seems). CONST_DOUBLE encodes infinite precision signed values as well. Just the infinite is limited by the size of the encoding, one and two HOST_WIDE_INTs. The interpretation of those infinite precision constants is based on the context (the operation mode of the operation we apply to a CONST_INT or CONST_DOUBLE). Thus CONST_INT and CONST_DOUBLE do not have a mode or precision but VOIDmode so different mode 1 can be shared (which is probably the original reason of that design decision). I have three problems with this. 1) Even if we could do this, it gives us answers that are not what the programmer expects!! Understand this!!! Programmers expect the code to behave the same way if they optimize it or not. If you do infinite precision arithmetic you get different answers than the machine may give you. While the C and C++ standards allow this, it is NOT desirable. While there are some optimizations that must make visible changes to be effective, this is certainly not the case with infinite precision mathMaking the change to infinite precision math only because you think is pretty is NOT best practices and will only give GCC a bad reputation in the community. Note that as I tried to explain above this isn't a change. _You_ are proposing a change here! Namely to associate a precision with a _constant_. What precision does a '1' have? What precision does a '12374' have? It doesn't have any. With this proposed change we will have the possibility to explicitely program mismatches like simplify_binary_operation (PLUS_EXPR, HImode, wide_int_rtx (SImode, 27), wide_int_rtx (QImode, 1)) even if _only_ the desired mode of the result matters! Because given the invariant that a wide-int is valid (it doesn't have bits outside of its precision) it's precision does no longer matter! Each programming language defines what it means to do constant arithmetic and by and large, our front ends do this the way they say. But once you go beyond that, you are squarely in the realm where an optimizer is expected to try to make the program run fast without changing the results. Infinite precision math in the optimizations is visible in that A * B / C may get different answers between an infinite precision evaluation and one that is finite precision as specified by the types. And all of this without any possible upside to the programmer. Why would we want to drive people to use llvm? This is my primary objection.If you ever gave any reason for infinite precision aside from that you consider it pretty, then i would consider it.BUT THIS IS NOT WHAT PROGRAMMERS WANT Programming languages or prettiness is not in any way a reason to do infinite precision math. All-caps or pretty punctuation does not change that. Infinite precision math is what we do now. What I ask for is to make separate changes separately. You want larger and host independent integer constants. Fine - do that. You want to change how we do arithmetic? Fine - do that. But please separate the two. (well, I'm likely still going to object to the latter) 2) The rtl level of GCC does not follow best practices by today's standards. It is quite fragile. It works quite well. At this point, the best that can be said is that it generally seems to work. What you are asking is for us to make the assumption that the code is in fact in better shape than it is.I understand
[PATCH, AArch64] Enable Redundant Extension Elimination by default at 02 or higher
This patch enables Redundant Extension Elimination pass for AArch64. Testing shows no regressions on linux and bare-metal. In terms of performance impact, it reduces code-size for some benchmarks and makes no difference on others. OK to commit to trunk? Cheers, Ian 2013-04-24 Ian Bolton ian.bol...@arm.com * common/config/aarch64/aarch64-common.c: Enable REE pass at O2 or higher by default. Index: gcc/common/config/aarch64/aarch64-common.c === --- gcc/common/config/aarch64/aarch64-common.c (revision 198231) +++ gcc/common/config/aarch64/aarch64-common.c (working copy) @@ -44,6 +44,8 @@ static const struct default_options aarc { /* Enable section anchors by default at -O1 or higher. */ { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, +/* Enable redundant extension instructions removal at -O2 and higher. */ +{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, { OPT_LEVELS_NONE, 0, NULL, 0 } };
Re: [AArch64] Fix vld1q_* asm constraints in arm_neon.h
On 24/04/13 15:34, James Greenhalgh wrote: Hi, The vld1q_* patterns in arm_neon.h did not correctly describe their register/memory constraints. This could lead to incorrect code generation where they were used. This patch fixes the bug by giving the patterns the correct register constraints. Regression tested on aarch64-none-elf without regressions. OK? Thanks, James Greenhalgh --- gcc/ 2013-04-24 James Greenhalgh james.greenha...@arm.com * config/aarch64/arm_neon.h (vld1q_lane*): Fix constraints. (vld1q_dup_sufp8, 16, 32, 64): Likewise. (vld1q_sufp8, 16, 32, 64): Likewise. OK /Marcus
Re: vtables patch 1/3: allow empty array initializations
this isn't true, it would be helpful to know. Given the size_t/uintptr_t testsuite patch I just submitted I'm thinking they don't exist, but I'm kind of wondering about m32c, so Cc'ing DJ. For m32c-elf with -mcpu=m32c, size_t is 16 bits but void* is 24 bits.
Re: [AArch64] Fix vld1q_* asm constraints in arm_neon.h
On 24/04/13 16:09, Marcus Shawcroft wrote: On 24/04/13 15:34, James Greenhalgh wrote: Hi, The vld1q_* patterns in arm_neon.h did not correctly describe their register/memory constraints. This could lead to incorrect code generation where they were used. This patch fixes the bug by giving the patterns the correct register constraints. Regression tested on aarch64-none-elf without regressions. OK? Thanks, James Greenhalgh --- gcc/ 2013-04-24 James Greenhalgh james.greenha...@arm.com * config/aarch64/arm_neon.h (vld1q_lane*): Fix constraints. (vld1q_dup_sufp8, 16, 32, 64): Likewise. (vld1q_sufp8, 16, 32, 64): Likewise. OK /Marcus and backport to 4.8 and arm/4.7 please. /Marcus
Re: [PATCH, AArch64] Enable Redundant Extension Elimination by default at 02 or higher
On 24/04/13 16:06, Ian Bolton wrote: This patch enables Redundant Extension Elimination pass for AArch64. Testing shows no regressions on linux and bare-metal. In terms of performance impact, it reduces code-size for some benchmarks and makes no difference on others. OK to commit to trunk? Cheers, Ian 2013-04-24 Ian Bolton ian.bol...@arm.com * common/config/aarch64/aarch64-common.c: Enable REE pass at O2 or higher by default. OK /Marcus
Re: patch to fix constant math -5th patch, rtl
On Wed, Apr 24, 2013 at 5:00 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). I agree it doesn't match the existing behaviour of HWI for CONST_INT or double-int for CONST_DOUBLE, but I think that's very much a good thing. The model for HWIs at the moment is that you have to truncate results to the canonical form after every operation where it matters. As you proved in your earlier message about the plus_constant bug, that's easily forgotten. I don't think the rtl code is doing all CONST_INT arithmetic on full HWIs because it wants to: it's doing it because that's the way C/C++ arithmetic on primitive types works. In other words, the current CONST_INT code is trying to emulate N-bit arithmetic (for gcc runtime N) using a single primitive integer type. wide_int gives us N-bit arithmetic directly; no emulation is needed. Ok, so what wide-int provides is integer values encoded in 'len' HWI words that fit in 'precision' or more bits (and often in less). wide-int also provides N-bit arithmetic operations. IMHO both are tied too closely together. A give constant doesn't really have a precision. Associating one with it to give a precision to an arithmetic operation looks wrong to me and are a source of mismatches. What RTL currently has looks better to me - operations have explicitely specified precisions. If your point is that an arbitrary-precision wide_int could be used by other (non-rtl, and probably non-tree) clients, then I don't really see the need. We already have mpz_t for that. What we don't have, and what we IMO need, is something that performs N-bit arithmetic for runtime N. It seems better to have a single class that does that for us (wide_int), rather than scatter N-bit emulation throughout the codebase, which is what we do now. mpz_t is not suitable here - it's way too expensive. double-int was the suitable bit for now, but given it's host dependency and inability to handle larger ints (VRP ...) the ability to use wide-ints for this looks appealing. Richard. Richard
Re: [Patch] Emit error for negative _Alignas alignment values
On Wed, 3 Apr 2013, Senthil Kumar Selvaraj wrote: 2013-04-03Senthil Kumar Selvaraj senthil_kumar.selva...@atmel.com * c-common.c (check_user_alignment): Emit error for negative values * gcc.dg/c1x-align-3.c: Add test for negative power of 2 OK (but note there should be a . at the end of each ChangeLog entry). -- Joseph S. Myers jos...@codesourcery.com
Re: [C++/C Patch] Have -Wpointer-arith enable by -Wpedantic, as documented
On Fri, 5 Apr 2013, Paolo Carlini wrote: Hi, in the audit trail of c++/56815 Manuel noticed that, inconsistently with the documentation, a LangEnabledBy was missing for -Wpointer-arith vs -Wpedantic. Then I noticed that a clean up was possible in the actual pedwarn calls, which, in fact, also fixes a bug: we don't want to actually emit such warnings for -Wpedantic -Wno-pointer-arith (as would happen before and after the trivial tweak above) The C changes are OK. -- Joseph S. Myers jos...@codesourcery.com
Re: patch to fix constant math -5th patch, rtl
Richard Biener richard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 4:35 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: On 04/24/2013 09:36 AM, Richard Biener wrote: On Wed, Apr 24, 2013 at 2:44 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: Can we in such cases please to a preparatory patch and change the CONST_INT/CONST_DOUBLE paths to do an explicit [sz]ext to mode precision first? I'm not sure what you mean here. CONST_INT HWIs are already sign-extended from mode precision to HWI precision. The 8-bit value 0xb1000 must be represented as (const_int -128); nothing else is allowed. E.g. (const_int 128) is not a valid QImode value on BITS_PER_UNIT==8 targets. Yes, that's what I understand. But consider you get a CONST_INT that is _not_ a valid QImode value. Current code simply trusts that it is, given the context from ... And the fact that it we have to trust but cannot verify is a severe problem at the rtl level that is not going to go away.what i have been strongly objecting to is your idea that just because we cannot verify it, we can thus go change it in some completely different way (i.e. the infinite precision nonsense that you keep hitting us with) and it will all be ok. Appearantly it is all ok because that's exactly what we have today (and had for the last 25 years). CONST_INT encodes infinite precision signed values (with the complication that a QImode 0x80 isn't valid, thus all modes are signed as well it seems). I think this is the fundamental disagreement. Your last step doesn't follow. RTL integer modes are neither signed nor unsigned. They are just a collection of N bits. The fact that CONST_INTs represent smaller-than-HWI integers in sign-extended form is purely a represential detail. There are no semantics attached to it. We could just as easily have decided to extend with zeros or ones instead of sign bits. Although the decision was made before my time, I'm pretty sure the point of having a canonical representation (which happened to be sign extension) was to make sure that any given rtl constant has only a single representation. It would be too confusing if a QImode 0x80 could be represented as either (const_int 128) or (const_int -128) (would (const_int 384) then also be OK?). And that's the problem with using an infinite-precision wide_int. If you directly convert a CONST_INT representation of 0x80 into a wide_int, you will always get infinite-precision -128, thanks to the CONST_INT canonicalisation rule. But if you arrive at 0x80 though arithmetic, you might get infinite-precision 128 instead. These two values would not compare equal. CONST_DOUBLE encodes infinite precision signed values as well. Just the infinite is limited by the size of the encoding, one and two HOST_WIDE_INTs. It encodes an N-bit integer. It's just that (assuming non-power-of-2 modes) several N-bit integers (with varying N) can be encoded using the same CONST_DOUBLE representation. That might be what you meant, sorry, and so might seem pedantic, but I wasn't sure. Richard
Re: [PATCH][ARM][testsuite][2/2] Add support for vcvt_f16_f32 and vcvt_f32_f16 NEON intrinsics
On 12/04/13 15:19, Kyrylo Tkachov wrote: Hi all, This patch adds testsuite options to check for a neon-fp16 effective target and add appropriate options. These are needed to test the half-precision NEON intrinsics that adding with patch [1/2] of this set. Tested on qemu with the tests added in patch [1/2]. Ok for trunk? Thanks, Kyrill gcc/testsuite/ChangeLog 2013-04-12 Kyrylo Tkachov kyrylo.tkachov at arm.com * lib/target-supports.exp (check_effective_target_arm_neon_fp16_ok_nocache): New procedure. (check_effective_target_arm_neon_fp16_ok): Likewise. (add_options_for_arm_neon_fp16): Likewise. OK. R.
Re: [PATCH] gcc: arm: linux-eabi: fix handling of armv4 bx fixups when linking
On 19/04/13 22:40, Mike Frysinger wrote: The bpabi.h header already sets up defines to automatically use the --fix-v4bx flag with the assembler linker as needed, and creates a default assembly linker spec which uses those. Unfortunately, the linux-eabi.h header clobbers the LINK_SPEC define and doesn't include the v4bx define when setting up its own. So while the assembler spec is retained and works fine to generate the right relocs, building for armv4 targets doesn't invoke the linker correctly so all the relocs get processed as if we had an armv4t target. You can see this with -dumpspecs when configuring gcc for an armv4 target and using --with-arch=armv4: $ armv4l-unknown-linux-gnueabi-gcc -dumpspecs | grep -B 1 fix-v4bx *subtarget_extra_asm_spec: %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx} ... With this fix in place, we also get the link spec: $ armv4l-unknown-linux-gnueabi-gcc -dumpspecs | grep -B 1 fix-v4bx *link: ... %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx} ... And all my hello world tests / glibc builds automatically turn the bx insn into the 'mov pc, lr' insn and all is right in the world. Signed-off-by: Mike Frysinger vap...@gentoo.org 2013-04-19 Mike Frysinger vap...@gentoo.org * config/arm/linux-eabi.h (LINK_SPEC): Add TARGET_FIX_V4BX_SPEC. --- Note: This issue seems to exist since the code was first introduced. At least, I've tested gcc-4.5.x and gcc-4.8.x and they both fail, and the code looks the same in gcc-4.[467].x. That means it's not technically a regression, so I guess policy dictates that it can't be merged into older branches? gcc/config/arm/linux-eabi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h index 4a425c8..8b7ebb2 100644 --- a/gcc/config/arm/linux-eabi.h +++ b/gcc/config/arm/linux-eabi.h @@ -80,7 +80,7 @@ /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to use the GNU/Linux version, not the generic BPABI version. */ #undef LINK_SPEC -#define LINK_SPEC BE8_LINK_SPEC \ +#define LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC \ LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ LINUX_TARGET_LINK_SPEC ANDROID_LINK_SPEC) Mike, Thanks for the patch. The concept is right, but I think we need a better approach to avoid problems like this cropping up again. What I'd like to see is BPABI_LINK_SPEC in bpabi.h split into EABI_LINK_SPEC (and containing TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC) and then all EABI-based targets using EABI_LINK_SPEC as part of their rule set. R.
patch for latest lra changes.
The following patch incorporates some LRA changes on lra and mike-lra branches. I am committing them to get a better testing of them for x86/x86-64 on trunk. The patch was successfully tested and bootstrapped on x86/x86-64. Committed as rev. 198252. 2013-04-24 Vladimir Makarov vmaka...@redhat.com * rtl.h (struct rtx_def): Add comment for field jump. (LRA_SUBREG_P): New macro. * recog.c (register_operand): Check LRA_SUBREG_P. * lra.c (lra): Add note at the end of RTL code. Align non-empty stack frame. * lra-spills.c (lra_spill): Align stack after spilling pseudos. (lra_final_code_change): Skip subreg change for operators. * lra-eliminations.c (eliminate_regs_in_insn): Make return earlier if there are no operand changes. * lra-constraints.c (curr_insn_set): New. (match_reload): Set LRA_SUBREG_P. (emit_spill_move): Ditto. (check_and_process_move): Use curr_insn_set. Process only single set insns. Don't initialize sec_mem_p and change_p. (simplify_operand_subreg): Use LRA_SUBREG_P. (reg_in_class_p): New function. (process_alt_operands): Use it. Use #if HAVE_ATTR_enabled instead of #ifdef. Add code to remove cycling. (process_address): Check EXTRA_CONSTRAINT_STR. Process even if non-null disp. Reload inner instead of disp when base and index are null. Try to put lo_sum into register. (EBB_PROBABILITY_CUTOFF): Redefine probability in percents. (check_and_process_move): Move code for move cost check to simple_move_p. Remove equiv_substitution. (simple_move_p): New function. (curr_insn_transform): Initialize sec_mem_p and change_p. Set up curr_insn_set. Call check_and_process_move only for single set insns. Use the new function. Move call of check_and_process_move after operand equiv substitution and address process. Index: rtl.h === --- rtl.h (revision 198251) +++ rtl.h (working copy) @@ -265,7 +265,8 @@ struct GTY((chain_next (RTX_NEXT (%h) 1 in a SET that is for a return. In a CODE_LABEL, part of the two-bit alternate entry field. 1 in a CONCAT is VAL_EXPR_IS_COPIED in var-tracking.c. - 1 in a VALUE is SP_BASED_VALUE_P in cselib.c. */ + 1 in a VALUE is SP_BASED_VALUE_P in cselib.c. + 1 in a SUBREG generated by LRA for reload insns. */ unsigned int jump : 1; /* In a CODE_LABEL, part of the two-bit alternate entry field. 1 in a MEM if it cannot trap. @@ -1411,6 +1412,11 @@ do { \ ((RTL_FLAG_CHECK1(SUBREG_PROMOTED_UNSIGNED_P, (RTX), SUBREG)-volatil) \ ? -1 : (int) (RTX)-unchanging) +/* True if the subreg was generated by LRA for reload insns. Such + subregs are valid only during LRA. */ +#define LRA_SUBREG_P(RTX) \ + (RTL_FLAG_CHECK1(LRA_SUBREG_P, (RTX), SUBREG)-jump) + /* Access various components of an ASM_OPERANDS rtx. */ #define ASM_OPERANDS_TEMPLATE(RTX) XCSTR (RTX, 0, ASM_OPERANDS) Index: recog.c === --- recog.c (revision 198251) +++ recog.c (working copy) @@ -1065,7 +1065,11 @@ register_operand (rtx op, enum machine_m REGNO (sub) FIRST_PSEUDO_REGISTER REG_CANNOT_CHANGE_MODE_P (REGNO (sub), GET_MODE (sub), mode) GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_INT - GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_FLOAT) + GET_MODE_CLASS (GET_MODE (sub)) != MODE_COMPLEX_FLOAT + /* LRA can generate some invalid SUBREGS just for matched +operand reload presentation. LRA needs to treat them as +valid. */ + ! LRA_SUBREG_P (op)) return 0; #endif Index: lra.c === --- lra.c (revision 198251) +++ lra.c (working copy) @@ -2203,6 +2203,10 @@ lra (FILE *f) timevar_push (TV_LRA); + /* Make sure that the last insn is a note. Some subsequent passes + need it. */ + emit_note (NOTE_INSN_DELETED); + COPY_HARD_REG_SET (lra_no_alloc_regs, ira_no_alloc_regs); init_reg_info (); @@ -2259,6 +2263,11 @@ lra (FILE *f) bitmap_initialize (lra_split_regs, reg_obstack); bitmap_initialize (lra_optional_reload_pseudos, reg_obstack); live_p = false; + if (get_frame_size () != 0 crtl-stack_alignment_needed) +/* If we have a stack frame, we must align it now. The stack size + may be a part of the offset computation for register + elimination. */ +assign_stack_local (BLKmode, 0, crtl-stack_alignment_needed); for (;;) { for (;;) Index: lra-spills.c === --- lra-spills.c(revision 198251) +++
Re: [cxx-conversion] RFC - Helper types for building GIMPLE
On 2013-04-19 07:30 , Richard Biener wrote: On Tue, 16 Apr 2013, Diego Novillo wrote: Thanks for the feedback, folks. I've removed the builder type and added some overloads to simplify the creation of gimple assignments. I have only added exactly the functions I needed to simplify a bit of gcc/asan.c. I plan to continue adding and tweaking as I change client code. Some things to note: - The builder type gave us some more abstraction that would be nice to put in gimple itself. However, gimple is now a union and gimple_seq is just a typedef for gimple. So, adding behaviour to them will need to wait until we convert gimple into a proper class. - This variant does not yield as much code savings as the original one, but it can be improved once gimple is a proper class. - I will continue working in trunk. This is not something that can be easily done in a branch since I will be touching a whole bunch of client code and I expect to make incremental changes for the next little while. Tested on x86_64. Comments inoine below. 2013-04-16 Diego Novillo dnovi...@google.com * gimple.c (create_gimple_tmp): New. (get_expr_type): New. (build_assign): New. (build_type_cast): New. * gimple.h (enum ssa_mode): Define. (gimple_seq_set_location): New. * asan.c (build_check_stmt): Change some gimple_build_* calls to use build_assign and build_type_cast. commit a9c165448358a920e5756881e016865a812a5d81 Author: Diego Novillo dnovi...@google.com Date: Tue Apr 16 14:29:09 2013 -0400 Simplified GIMPLE IL builder functions. diff --git a/gcc/gimple.c b/gcc/gimple.c index 8bd80c8..64f7b1a 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -4207,4 +4207,105 @@ gimple_asm_clobbers_memory_p (const_gimple stmt) return false; } + + +/* Create and return an unnamed temporary. MODE indicates whether + this should be an SSA or NORMAL temporary. TYPE is the type to use + for the new temporary. */ + +tree +create_gimple_tmp (tree type, enum ssa_mode mode) +{ + return (mode == M_SSA) + ? make_ssa_name (type, NULL) + : create_tmp_var (type, NULL); +} Eh - what exactly is this for? It doesn't simplify anything! Helper for the other builders. Should be private to gimple.c. + +/* Return the expression type to use based on the CODE and type of + the given operand OP. If the expression CODE is a comparison, + the returned type is boolean_type_node. Otherwise, it returns + the type of OP. */ + +static tree +get_expr_type (enum tree_code code, tree op) +{ + return (TREE_CODE_CLASS (code) == tcc_comparison) + ? boolean_type_node + : TREE_TYPE (op); +} Which returns wrong results for FIX_TRUNC_EXPR and a double op. This function cannot be implemented correctly with the given signature (read: the type of the expression cannot be determined by just looking at 'code' and 'op' in all cases). Drop it. Hmm, yeah. I will. This 'mode' thingie is broken. Make that beast auto-detected (gimple_in_ssa_p is_gimple_reg_type). I don't like that. This would make it context sensitive. We should move away from these magic globals. Why not start simple and continue my overloading patches (which dropped gimple_build_assign_with_ops3) to make all the gimple stmt builders overloads of a single gimple_build_assing () ? That was kind of the idea. But I started at a different place. I'll keep adding overloads and converting more client code. Do it incrementally though, as I expect that with each new overload one function goes away and users are adjusted. That way you also get testing coverage - which your patch has none. Oh, it does. All the asan tests exercise it. +/* Create and return a type cast assignment. This creates a NOP_EXPR + that converts OP to TO_TYPE. */ + +gimple +build_type_cast (tree to_type, tree op, enum ssa_mode mode) +{ + tree lhs = create_gimple_tmp (to_type, mode); + return gimple_build_assign_with_ops (NOP_EXPR, lhs, op, NULL_TREE); +} + +gimple +build_type_cast (tree to_type, gimple op, enum ssa_mode mode) +{ + return build_type_cast (to_type, gimple_assign_lhs (op), mode); +} I'd say it should be tree gimple_convert (gimple_stmt_iterator *gsi, tree type, tree op) which converts op to type, returning either 'op' (it's type is compatible to 'type') or a new register temporary (please ICE on !is_gimple_reg_type converts!) which initialization is inserted at gsi (eventually needs an extra param for the iterator update kind - unless we standardize on GSI_CONTINUE_LINKING for all 'builders' which would make sense). This gimple_convert should be used to replace all fold_convert calls in the various passes (well, those that end up re-gimplifying the result). Why so many side-effects? The reason I'm returning gimple is so that it can be used as an argument in more builders. They use the LHS. bool gimple_val_nonnegative_real_p (tree); + + +/* Set the location of all statements in
Re: patch to fix constant math -5th patch, rtl
Richard Biener richard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 5:00 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Biener richard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). I agree it doesn't match the existing behaviour of HWI for CONST_INT or double-int for CONST_DOUBLE, but I think that's very much a good thing. The model for HWIs at the moment is that you have to truncate results to the canonical form after every operation where it matters. As you proved in your earlier message about the plus_constant bug, that's easily forgotten. I don't think the rtl code is doing all CONST_INT arithmetic on full HWIs because it wants to: it's doing it because that's the way C/C++ arithmetic on primitive types works. In other words, the current CONST_INT code is trying to emulate N-bit arithmetic (for gcc runtime N) using a single primitive integer type. wide_int gives us N-bit arithmetic directly; no emulation is needed. Ok, so what wide-int provides is integer values encoded in 'len' HWI words that fit in 'precision' or more bits (and often in less). wide-int also provides N-bit arithmetic operations. IMHO both are tied too closely together. A give constant doesn't really have a precision. I disagree. All rtl objects have a precision. REGs, MEMs, SYMBOL_REFs, LABEL_REFs and CONSTs all have precisions, and the last three are run-time constants. Why should CONST_INT and CONST_DOUBLE be different? See e.g. the hoops that cselib has to jump through: /* We need to pass down the mode of constants through the hash table functions. For that purpose, wrap them in a CONST of the appropriate mode. */ static rtx wrap_constant (enum machine_mode mode, rtx x) { if ((!CONST_SCALAR_INT_P (x)) GET_CODE (x) != CONST_FIXED) return x; gcc_assert (mode != VOIDmode); return gen_rtx_CONST (mode, x); } That is, cselib locally converts (const_int X) into (const:M (const_int X)), purely so that it doesn't lose track of the CONST_INT's mode. (const:M (const_int ...)) is invalid rtl elsewhere, but a necessary hack here all the same. What RTL currently has looks better to me - operations have explicitely specified precisions. But that isn't enough to determine the precision of all operands. A classic case is ZERO_EXTEND. Something like: (zero_extend:DI (reg:SI X)) is unambiguous. But if you substitute (reg:SI X) with a CONST_INT, the result becomes ambiguous. E.g. we could end up with: (zero_extend:DI (const_int -1)) The ZERO_EXTEND operand still has SImode, but that fact is not explicit in the rtl, and is certainly not explicit in the ZERO_EXTEND operation. So if we just see the result above, we no longer know whether the result should be (const_int 0xff), (const_int 0x), or what. The same goes for: (zero_extend:DI (const_int 256)) where (const_int 0) and (const_int 256) are both potential results. It's not just ZERO_EXTEND. E.g.: (zero_extract:SI ...) tells you that an SImode value is being extracted, but it doesn't tell you what precision you're extracting from. So for: (zero_extract:SI (const_int -1) (const_int X) (const_int 3)) how many 1 bits should be the result have? Because of the sign-extension canonicalisation, the answer depends on the precision of the (const_int -1), which has now been lost. If instead CONST_INTs were stored in zero-extended form, the same ambiguity would apply to SIGN_EXTRACT. This sort of thing has been a constant headache in rtl. I can't stress how much I feel it is _not_ better than recording the precision of the constant :-) Richard
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
I would really rather avoid introducing another macro to be removed again later. Instead, let's use a value of __cplusplus greater than 201103L, perhaps 201300? Jason
C++ PATCH for c++/53721 (this in trailing-return-type)
We were already trying to support the rule that *this doesn't need to be complete for class member access, but we were comparing the wrong thing to current_class_ref. Tested x86_64-pc-linux-gnu, applying to trunk. commit 460a34462cd0dd2787e1ffddf4b08d1c36ff5557 Author: Jason Merrill ja...@redhat.com Date: Wed Apr 24 11:30:03 2013 -0400 PR c++/53721 * parser.c (cp_parser_postfix_dot_deref_expression): Fix thinko. diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index cb26292..2239a07 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -6155,7 +6155,7 @@ cp_parser_postfix_dot_deref_expression (cp_parser *parser, /* Unlike the object expression in other contexts, *this is not required to be of complete type for purposes of class member access (5.2.5) outside the member function body. */ - else if (scope != current_class_ref + else if (postfix_expression != current_class_ref !(processing_template_decl scope == current_class_type)) scope = complete_type_or_else (scope, NULL_TREE); /* Let the name lookup machinery know that we are processing a diff --git a/gcc/testsuite/g++.dg/cpp0x/decltype51.C b/gcc/testsuite/g++.dg/cpp0x/decltype51.C new file mode 100644 index 000..9ab4146 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/decltype51.C @@ -0,0 +1,10 @@ +// PR c++/53721 +// { dg-require-effective-target c++11 } + +struct A +{ + void f() {}; + auto g() - decltype(this-f()) + { + } +};
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
On Wed, Apr 24, 2013 at 11:26 AM, Jason Merrill ja...@redhat.com wrote: I would really rather avoid introducing another macro to be removed again later. Instead, let's use a value of __cplusplus greater than 201103L, perhaps 201300? Jason yes, that makes sense, and even a better path forward. Hopefully, the next committee draft will have that value. -- Gaby
Re: vtables patch 1/3: allow empty array initializations
On 04/24/2013 05:10 PM, DJ Delorie wrote: this isn't true, it would be helpful to know. Given the size_t/uintptr_t testsuite patch I just submitted I'm thinking they don't exist, but I'm kind of wondering about m32c, so Cc'ing DJ. For m32c-elf with -mcpu=m32c, size_t is 16 bits but void* is 24 bits. 24 bits stored as three bytes, or four? How does this affect vtable layout? I would have expected the C++ frontend and libsupc++ to currently be inconsistent with each other given such a setup. Bernd
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
On 04/24/2013 12:48 PM, Gabriel Dos Reis wrote: On Wed, Apr 24, 2013 at 11:26 AM, Jason Merrill ja...@redhat.com wrote: I would really rather avoid introducing another macro to be removed again later. Instead, let's use a value of __cplusplus greater than 201103L, perhaps 201300? yes, that makes sense, and even a better path forward. Hopefully, the next committee draft will have that value. We won't have a value for the next standard until we have a next standard, so let's just invent a value for now; presumably people will know better than to check for that invented value specifically. Jason
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
Hi, On 04/24/2013 06:55 PM, Jason Merrill wrote: On 04/24/2013 12:48 PM, Gabriel Dos Reis wrote: On Wed, Apr 24, 2013 at 11:26 AM, Jason Merrill ja...@redhat.com wrote: I would really rather avoid introducing another macro to be removed again later. Instead, let's use a value of __cplusplus greater than 201103L, perhaps 201300? yes, that makes sense, and even a better path forward. Hopefully, the next committee draft will have that value. We won't have a value for the next standard until we have a next standard, so let's just invent a value for now; presumably people will know better than to check for that invented value specifically. This invention scared me, that's why I didn't even propose it. Anyway, for the time being I'm going to simply revert what I committed earlier today. It would be great if somebody can implement the __cplusplus change as soon as possible. Thanks! Paolo.
[Patch][google/gcc-4_8] Fix arm build broken
ARM build (on chrome) is broken. This patch fixed the problem. Tested by building arm cross compiler successfully. * gcc/config/config.cc: Removed duplicated header files which causes error in generating gtyp-input.list. * gcc/config/arm.md: Most of define_c_enum unspec entries already defined in undefs.md; all define_c_enum unspecv entries already defined in undefs.md; merged these 2 enum definitions with unspecs.md and removed them from arm.md. * gcc/config/unspecs.md: Merged several define_c_enum unspec entries from arm.md. Ok for branch google/gcc-4_8? H. patch Description: Binary data
RTL gensupport - fix warning when using a match_scratch predicate
All  Currently using the match_scratch predicate on a destination operand will trigger the warning     warning: destination operand 0 allows non-lvalue,  This happends because add_predicate_code()  will set the pred-allows_non_lvalue when it encounters a SCRATCH rtx code which subsequently triggers the above genrecog warning/  Graham   gcc/ChangeLog * gensupport.c:: (add_predicate_code) Also exclude SCRATCH from rtx codes which allow non-lvalues.  Index: gcc/gensupport.c = --- gcc/gensupport.c   (revision 198200) +++ gcc/gensupport.c   (working copy) @@ -2732,7 +2732,8 @@  code != MEM  code != CONCAT  code != PARALLEL - code != STRICT_LOW_PART) + code != STRICT_LOW_PART + code != SCRATCH)    pred-allows_non_lvalue = true;   if (pred-num_codes == 1)
[C++ Patch] Define __cplusplus == 201300L for -std=c++1y
Hi again ;) thus I'm finishing testing (past g++.dg/dg.exp) the below. I added proper GNUCXX1Y and CXX1Y modes and then the rest seems largely straightforward. Ok if testing passes? Thanks, Paolo. /// /libcpp 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * include/cpplib.h (enum c_lang): Add CLK_GNUCXX1Y and CLK_CXX1Y. * init.c (lang_defaults): Add defaults for the latter. (cpp_init_builtins): Define __cplusplus as 201300L for the latter. * lex.c (_cpp_lex_direct): Update. /gcc/c-family 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * c-opts.c (set_std_cxx11): Use CLK_CXX1Y and CLK_GNUCXX1Y. /gcc/testsuite 2013-04-24 Paolo Carlini paolo.carl...@oracle.com * g++.dg/cpp1y/cplusplus.C: Add. Index: gcc/c-family/c-opts.c === --- gcc/c-family/c-opts.c (revision 198258) +++ gcc/c-family/c-opts.c (working copy) @@ -1471,7 +1471,7 @@ set_std_cxx11 (int iso) static void set_std_cxx1y (int iso) { - cpp_set_lang (parse_in, iso ? CLK_CXX11: CLK_GNUCXX11); + cpp_set_lang (parse_in, iso ? CLK_CXX1Y: CLK_GNUCXX1Y); flag_no_gnu_keywords = iso; flag_no_nonansi_builtin = iso; flag_iso = iso; Index: gcc/testsuite/g++.dg/cpp1y/cplusplus.C === --- gcc/testsuite/g++.dg/cpp1y/cplusplus.C (revision 0) +++ gcc/testsuite/g++.dg/cpp1y/cplusplus.C (working copy) @@ -0,0 +1,5 @@ +// { dg-options -std=c++1y } + +#if __cplusplus 201300L +#error +#endif Index: libcpp/include/cpplib.h === --- libcpp/include/cpplib.h (revision 198258) +++ libcpp/include/cpplib.h (working copy) @@ -165,7 +165,8 @@ enum cpp_ttype /* C language kind, used when calling cpp_create_reader. */ enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_STDC89, CLK_STDC94, CLK_STDC99, CLK_STDC11, -CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11, CLK_ASM}; +CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11, +CLK_GNUCXX1Y, CLK_CXX1Y, CLK_ASM}; /* Payload of a NUMBER, STRING, CHAR or COMMENT token. */ struct GTY(()) cpp_string { Index: libcpp/init.c === --- libcpp/init.c (revision 198258) +++ libcpp/init.c (working copy) @@ -98,11 +98,13 @@ static const struct lang_flags lang_defaults[] = /* CXX98*/ { 0, 1, 1, 0, 1, 1, 1, 0, 0,0 }, /* GNUCXX11 */ { 1, 1, 1, 0, 0, 1, 1, 1, 1,1 }, /* CXX11*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1,1 }, + /* GNUCXX1Y */ { 1, 1, 1, 0, 0, 1, 1, 1, 1,1 }, + /* CXX1Y*/ { 1, 1, 1, 0, 1, 1, 1, 1, 1,1 }, /* ASM */ { 0, 0, 1, 0, 0, 1, 0, 0, 0,0 } - /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX11, and - CXX11 when no longer experimental (when all uses of identifiers - in the compiler have been audited for correct handling of - extended identifiers). */ + /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX11, CXX11, + GNUCXX1Y, and CXX1Y when no longer experimental (when all uses of + identifiers in the compiler have been audited for correct handling + of extended identifiers). */ }; /* Sets internal flags correctly for a given language. */ @@ -476,8 +478,11 @@ cpp_init_builtins (cpp_reader *pfile, int hosted) if (CPP_OPTION (pfile, cplusplus)) { - if (CPP_OPTION (pfile, lang) == CLK_CXX11 - || CPP_OPTION (pfile, lang) == CLK_GNUCXX11) + if (CPP_OPTION (pfile, lang) == CLK_CXX1Y + || CPP_OPTION (pfile, lang) == CLK_GNUCXX1Y) + _cpp_define_builtin (pfile, __cplusplus 201300L); + else if (CPP_OPTION (pfile, lang) == CLK_CXX11 + || CPP_OPTION (pfile, lang) == CLK_GNUCXX11) _cpp_define_builtin (pfile, __cplusplus 201103L); else _cpp_define_builtin (pfile, __cplusplus 199711L); Index: libcpp/lex.c === --- libcpp/lex.c(revision 198258) +++ libcpp/lex.c(working copy) @@ -2298,8 +2298,8 @@ _cpp_lex_direct (cpp_reader *pfile) is neither : nor , the is treated as a preprocessor token by itself. */ if (CPP_OPTION (pfile, cplusplus) - (CPP_OPTION (pfile, lang) == CLK_CXX11 - || CPP_OPTION (pfile, lang) == CLK_GNUCXX11) + CPP_OPTION (pfile, lang) != CLK_CXX98 + CPP_OPTION (pfile, lang) != CLK_GNUCXX buffer-cur[1] == ':' buffer-cur[2] != ':' buffer-cur[2] != '') break;
Re: [C++ Patch] Add __GXX_EXPERIMENTAL_CXX1Y__
On Wed, Apr 24, 2013 at 11:55 AM, Jason Merrill ja...@redhat.com wrote: On 04/24/2013 12:48 PM, Gabriel Dos Reis wrote: On Wed, Apr 24, 2013 at 11:26 AM, Jason Merrill ja...@redhat.com wrote: I would really rather avoid introducing another macro to be removed again later. Instead, let's use a value of __cplusplus greater than 201103L, perhaps 201300? yes, that makes sense, and even a better path forward. Hopefully, the next committee draft will have that value. We won't have a value for the next standard until we have a next standard, so let's just invent a value for now; presumably people will know better than to check for that invented value specifically. Jason We can always try to convince the project editor to honor a tradition from the past where the editor would set the value of __cplusplus to a value that reflect the approval date of a working draft. This is purely editorial and he could exercise that discretion to help implementor do the right thing. That way, we don't have to way to have a standard. Of course, programmers should not test for equality of that value. -- Gaby
Re: [C++ Patch] Define __cplusplus == 201300L for -std=c++1y
On 04/24/2013 02:02 PM, Paolo Carlini wrote: +#if __cplusplus 201300L Don't test for this value. Use = 201103L instead. OK with that change. Jason
Re: [PATCH, PR 10474] Shedule pass_cprop_hardreg before pass_thread_prologue_and_epilogue
Hi, On Fri, Apr 19, 2013 at 09:27:28AM -0600, Jeff Law wrote: On 04/18/2013 05:08 PM, Martin Jambor wrote: On Fri, Apr 19, 2013 at 12:37:58AM +0200, Steven Bosscher wrote: On Fri, Apr 19, 2013 at 12:09 AM, Martin Jambor wrote: I also have not tried scheduling the hard register copy propagation pass twice and measuring the impact on compile times. Any suggestion what might be a good testcase for that? I think a better question is when this would be useful in the first place, and why. In other words: If you propagate hardregs before shrink wrapping, what could be a source of new opportunities after shrink wrapping? Yes, we also did that and neither I nor Honza could think of any potential problems there. And of course, I'd also measure how many statements the second run of the pass changed. I'll probably do that tomorrow anyway. I'd be very curious to see those numbers. While I tend to think the opportunities missed by just running it early will be in the noise and nothing we can or should do anything about given the compile-time cost of running it twice. However, experience has shown it's worth doing the investigative work to be sure. Here they are. First, I simply looked at how many instructions would be changed by a second run of the pass in its current position during C and C++ bootstrap: | | Insns changed | % | |-+---+| | Trunk - only pass in original place |172608 | 100.00 | | First pass before pro/eipilogue |170322 | 98.68 | | Second pass in the original place | 8778 | 5.09 | 5% was worth investigating more. The 20 source files with highest number of affected instructions by the second run were: 939 mine/src/libgcc/config/libbid/bid_binarydecimal.c 909 mine/src/libgcc/config/libbid/bid128_div.c 813 mine/src/libgcc/config/libbid/bid64_div.c 744 mine/src/libgcc/config/libbid/bid128_compare.c 615 mine/src/libgcc/config/libbid/bid128_to_int32.c 480 mine/src/libgcc/config/libbid/bid128_to_int64.c 450 mine/src/libgcc/config/libbid/bid128_to_uint32.c 408 mine/src/libgcc/config/libbid/bid128_fma.c 354 mine/src/libgcc/config/libbid/bid128_to_uint64.c 327 mine/src/libgcc/config/libbid/bid128_add.c 246 mine/src/libgcc/libgcc2.c 141 mine/src/libgcc/config/libbid/bid_round.c 129 mine/src/libgcc/config/libbid/bid64_mul.c 117 mine/src/libgcc/config/libbid/bid64_to_int64.c 96 mine/src/libsanitizer/tsan/tsan_interceptors.cc 96 mine/src/libgcc/config/libbid/bid64_compare.c 87 mine/src/libgcc/config/libbid/bid128_noncomp.c 84 mine/src/libgcc/config/libbid/bid64_to_bid128.c 81 mine/src/libgcc/config/libbid/bid64_to_uint64.c 63 mine/src/libgcc/config/libbid/bid64_to_int32.c I have manually examined some of the late opportunities for propagation in mine/src/libgcc/config/libbid/bid_binarydecimal.c and majority of them was a result of peephole2. Still, the list of files showed that the config sources of libraries which might have been built too many times (I so not know how many but for example I had multilib allowed which changes things a lot) probably skew the numbers a lot. So next time I measured only the number of instructions changed during make stage2-bubble with multilib disabled. In order to find out where do the new opportunities come from, I added scheduled pass_cprop_hardreg after every pass between pass_branch_target_load_optimize1 and pass_fast_rtl_dce and counted how many instructions are modified (relative to just having the pass where it is now): | | Insns changed | % | |--+---+| | Trunk - only pass in original place | 76225 | 100.00 | |--+---+| | Before pro/eipilogue | 77906 | 102.21 | | After pro/eipilogue | 267 | 0.35 | | After pass_rtl_dse2 | 0 | 0.00 | | After pass_stack_adjustments | 0 | 0.00 | | After pass_jump2 | 372 | 0.49 | | After pass_peephole2 | 119 | 0.16 | | After pass_if_after_reload |37 | 0.05 | | After pass_regrename - original position | 0 | 0.00 | Which seems much better. The 12 source files with most instructions changed now were: 116 src/libgcc/libgcc2.c 64 src/libsanitizer/tsan/tsan_interceptors.cc 36 src/libsanitizer/tsan/tsan_fd.cc 31 src/gcc/cp/parser.c 20 cp-demangle.c 19 src/libiberty/cp-demangle.c 12 gtype-desc.c 12 src/libgcc/unwind-dw2.c 11
PATCH: Add -mx32 to x86-64 ASM_SPEC
When GCC is configured for x86_64-elf for embedded target, --x32 isn't passed to assembler for gcc -mx32. This patch adds -mx32 support to GCC assembler driver. OK for trunk? Thanks. H.J. --- 2013-04-24 H.J. Lu hongjiu...@intel.com * config/i386/x86-64.h (ASM_SPEC): Support -mx32. diff --git a/gcc/config/i386/x86-64.h b/gcc/config/i386/x86-64.h index c103c58..3363439 100644 --- a/gcc/config/i386/x86-64.h +++ b/gcc/config/i386/x86-64.h @@ -49,7 +49,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define WCHAR_TYPE_SIZE 32 #undef ASM_SPEC -#define ASM_SPEC %{m32:--32} %{m64:--64} +#define ASM_SPEC %{m32:--32} %{m64:--64} %{mx32:--x32} #undef ASM_OUTPUT_ALIGNED_BSS #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
[patch] Hash table changes from cxx-conversion branch - config part
This patch is a consolodation of the hash_table patches to the cxx-conversion branch for files under gcc/config. Recipients: config/arm/arm.c - ni...@redhat.com, ramana.radhakrish...@arm.com config/ia64/ia64.c - wil...@tuliptree.org, sell...@mips.com config/mips/mips.c - rdsandif...@googlemail.com config/sol2.c - r...@cebitec.uni-bielefeld.de config/i386/winnt.c - c...@gcc.gnu.org, kti...@redhat.com global - rguent...@suse.de, dnovi...@google.com Update various hash tables from htab_t to hash_table. Modify types and calls to match. * config/arm/arm.c'arm_libcall_uses_aapcs_base::libcall_htab Fold libcall_eq and libcall_hash into new struct libcall_hasher. * config/ia64/ia64.c'bundle_state_table Fold bundle_state_hash and bundle_state_eq_p into new struct bundle_state_hasher. * config/mips/mips.c'mips_offset_table Fold mips_lo_sum_offset_hash and mips_lo_sum_offset_eq into new struct mips_lo_sum_offset_hasher. In mips_reorg_process_insns, change call to for_each_rtx to pass a pointer to the hash_table rather than a htab_t. This change requires then dereferencing that pointer in mips_record_lo_sum to obtain the hash_table. * config/sol2.c'solaris_comdat_htab Fold comdat_hash and comdat_eq into new struct comdat_entry_hasher. * config/i386/winnt.c'i386_pe_section_type_flags::htab * config/i386/winnt.c'i386_find_on_wrapper_list::wrappers Fold wrapper_strcmp into new struct wrapped_symbol_hasher. Tested on x86_64. Tested with config-list.mk. Index: gcc/ChangeLog 2013-04-24 Lawrence Crowl cr...@google.com * config/arm/t-arm: Update for below. * config/arm/arm.c (arm_libcall_uses_aapcs_base::libcall_htab): Change type to hash_table. Update dependent calls and types. * config/i386/t-cygming: Update for below. * config/i386/t-interix: Update for below. * config/i386/winnt.c (i386_pe_section_type_flags::htab): Change type to hash_table. Update dependent calls and types. (i386_find_on_wrapper_list::wrappers): Likewise. * config/ia64/t-ia64: Update for below. * config/ia64/ia64.c (bundle_state_table): Change type to hash_table. Update dependent calls and types. * config/mips/mips.c (mips_reorg_process_insns::htab): Change type to hash_table. Update dependent calls and types. * config/sol2.c (solaris_comdat_htab): Change type to hash_table. Update dependent calls and types. * config/t-sol2: Update for above. Index: gcc/config/ia64/ia64.c === --- gcc/config/ia64/ia64.c (revision 198213) +++ gcc/config/ia64/ia64.c (working copy) @@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. #include target-def.h #include common/common-target.h #include tm_p.h -#include hashtab.h +#include hash-table.h #include langhooks.h #include gimple.h #include intl.h @@ -257,8 +257,6 @@ static struct bundle_state *get_free_bun static void free_bundle_state (struct bundle_state *); static void initiate_bundle_states (void); static void finish_bundle_states (void); -static unsigned bundle_state_hash (const void *); -static int bundle_state_eq_p (const void *, const void *); static int insert_bundle_state (struct bundle_state *); static void initiate_bundle_state_table (void); static void finish_bundle_state_table (void); @@ -8528,18 +8526,21 @@ finish_bundle_states (void) } } -/* Hash table of the bundle states. The key is dfa_state and insn_num - of the bundle states. */ +/* Hashtable helpers. */ -static htab_t bundle_state_table; +struct bundle_state_hasher : typed_noop_remove bundle_state +{ + typedef bundle_state value_type; + typedef bundle_state compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); +}; /* The function returns hash of BUNDLE_STATE. */ -static unsigned -bundle_state_hash (const void *bundle_state) +inline hashval_t +bundle_state_hasher::hash (const value_type *state) { - const struct bundle_state *const state -= (const struct bundle_state *) bundle_state; unsigned result, i; for (result = i = 0; i dfa_state_size; i++) @@ -8550,19 +8551,20 @@ bundle_state_hash (const void *bundle_st /* The function returns nonzero if the bundle state keys are equal. */ -static int -bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2) +inline bool +bundle_state_hasher::equal (const value_type *state1, + const compare_type *state2) { - const struct bundle_state *const state1 -= (const struct bundle_state *) bundle_state_1; - const struct bundle_state *const state2 -= (const struct bundle_state *) bundle_state_2; - return (state1-insn_num == state2-insn_num memcmp (state1-dfa_state, state2-dfa_state, dfa_state_size) == 0); } +/* Hash table of the bundle states. The key is
Re: vtables patch 1/3: allow empty array initializations
24 bits stored as three bytes, or four? How does this affect vtable layout? I would have expected the C++ frontend and libsupc++ to currently be inconsistent with each other given such a setup. In memory, four, I think. The address registers really are three bytes though. They're PSImode and gcc doesn't really have a good way of using any specified PSImode precision.
Re: PATCH: Add -mx32 to x86-64 ASM_SPEC
When GCC is configured for x86_64-elf for embedded target, --x32 isn't passed to assembler for gcc -mx32. This patch adds -mx32 support to GCC assembler driver. OK for trunk? OK, Thanks Honza
Re: [PATCH, PR 10474] Shedule pass_cprop_hardreg before pass_thread_prologue_and_epilogue
On 04/24/2013 12:24 PM, Martin Jambor wrote: Here they are. First, I simply looked at how many instructions would be changed by a second run of the pass in its current position during C and C++ bootstrap: | | Insns changed | % | |-+---+| | Trunk - only pass in original place |172608 | 100.00 | | First pass before pro/eipilogue |170322 | 98.68 | | Second pass in the original place | 8778 | 5.09 | 5% was worth investigating more. The 20 source files with highest number of affected instructions by the second run were: 939 mine/src/libgcc/config/libbid/bid_binarydecimal.c 909 mine/src/libgcc/config/libbid/bid128_div.c 813 mine/src/libgcc/config/libbid/bid64_div.c 744 mine/src/libgcc/config/libbid/bid128_compare.c 615 mine/src/libgcc/config/libbid/bid128_to_int32.c 480 mine/src/libgcc/config/libbid/bid128_to_int64.c 450 mine/src/libgcc/config/libbid/bid128_to_uint32.c 408 mine/src/libgcc/config/libbid/bid128_fma.c 354 mine/src/libgcc/config/libbid/bid128_to_uint64.c 327 mine/src/libgcc/config/libbid/bid128_add.c 246 mine/src/libgcc/libgcc2.c 141 mine/src/libgcc/config/libbid/bid_round.c 129 mine/src/libgcc/config/libbid/bid64_mul.c 117 mine/src/libgcc/config/libbid/bid64_to_int64.c 96 mine/src/libsanitizer/tsan/tsan_interceptors.cc 96 mine/src/libgcc/config/libbid/bid64_compare.c 87 mine/src/libgcc/config/libbid/bid128_noncomp.c 84 mine/src/libgcc/config/libbid/bid64_to_bid128.c 81 mine/src/libgcc/config/libbid/bid64_to_uint64.c 63 mine/src/libgcc/config/libbid/bid64_to_int32.c The first thing that jumps out at me here is there's probably some idiom used in the BID code that is triggering. I have manually examined some of the late opportunities for propagation in mine/src/libgcc/config/libbid/bid_binarydecimal.c and majority of them was a result of peephole2. I can pretty easily see how peep2 may expose opportunities for hard-cprop. Of course, those opportunities may actually be undoing some of the benefit of the peep2 patterns. So next time I measured only the number of instructions changed during make stage2-bubble with multilib disabled. In order to find out where do the new opportunities come from, I added scheduled pass_cprop_hardreg after every pass between pass_branch_target_load_optimize1 and pass_fast_rtl_dce and counted how many instructions are modified (relative to just having the pass where it is now): Thanks. That's a real interesting hunk of data. Interesting that we have so many after {pro,epi}logue generation, a full 33% of the changed insns stem from here and I can't think of why that should be the case. Perhaps there's some second order effect that shows itself after the first pass of cprop-hardreg. I can see several ways jump2 could open new propagation possibilities. As I noted earlier in this message, the opportunities after peep2 may actually be doing more harm than good. It's probably not worth the work involved, but a more sensible visitation order for reg-cprop would probably be good. Similarly we could have the capability to mark interesting blocks and just reg-cprop the interesting blocks after threading the prologue/epilogue. I'm not sure what the conclusion is. Probably that there are cases where doing propagation late can be a good thing but these do not occur that often. And that more measurements should probably be done. Anyway, I'll look into alternatives before (see below) pushing this further. Knowing more about those opportunities would be useful. The most interesting ones to me would be those right after the prologue/epilogue. Having just run the cprop, then attached the prologue/epilogue, I wouldn't expect there to be many propagation opportunities. I have looked at the patch Vlad suggested (most things are new to me in RTL land and so almost everything takes me ages) and I'm certainly willing to try and mimic some of it in order to (hopefully) get the same effect that propagating and shrink-wrapping preparation moves can do. Yes, this is not enough to deal with parameters loaded from stack but unlike latest insertion, it could also work when the parameters are also used on the fast path, which is often the case. In fact, propagation helps exactly because they are used in the entry BB. Hopefully they will end up in a caller-saved register on the fast path and we'll flip it over to the callee-saved problematic one only on (slow) paths going through calls. Of course, the two approaches are not mutually exclusive and load sinking might help too. Note that sinking copies is formulated as sink copies one at a time in Morgan's text. Not sure that's needed in this case since we're just sinking a few, well defined copies.
Re: RFA: enable LRA for rs6000 [patch for WRF]
I'm seeing a lot of failures with these changes in make check. The first two that I noticed on a build that did not use --with-cpu=power7: 1) c-c++-common/dfp/call-by-value.c (and others in the directory) fails with -O0 for all targets before power7 because it can't spill SDmode. Note, in the earlier targets, we need to have a wider spill slot because we don't have 32-bit integer load/store to the FPR registers. FAIL: c-c++-common/dfp/call-by-value.c (internal compiler error) FAIL: c-c++-common/dfp/call-by-value.c (test for excess errors) Excess errors: /home/meissner/fsf-src/meissner-lra/gcc/testsuite/c-c++-common/dfp/call-by-value.c:43:1: internal compiler error: in assign_by_spills, at lra-assigns.c:1268 0x104ceff7 assign_by_spills /home/meissner/fsf-src/meissner-lra/gcc/lra-assigns.c:1268 0x104cfe43 lra_assign() /home/meissner/fsf-src/meissner-lra/gcc/lra-assigns.c:1425 0x104ca837 lra(_IO_FILE*) /home/meissner/fsf-src/meissner-lra/gcc/lra.c:2309 0x1047d6eb do_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4619 0x1047d6eb rest_of_handle_reload /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4731 2) A lot of fortran tests are failing for all optimization levels due to segmentation violations at runtime: AIL: gfortran.dg/advance_1.f90 -O0 execution test Executing on host: /home/meissner/fsf-build-ppc64/meissner-lra/gcc/testsuite/gfortran1/../../gfortran -B/home/meissner/fsf-build-ppc64/meissner-lra/gcc/testsuite/gfortran1/../../ -B/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unkno wn-linux-gnu/./libgfortran/ /home/meissner/fsf-src/meissner-lra/gcc/testsuite/gfortran.dg/advance_1.f90 -fno-diagnostics-show-caret -O1 -pedantic-errors -B/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libg fortran/.libs -L/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs -L/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs -lm -m32 -o ./advance_1.exe(t imeout = 300) spawn /home/meissner/fsf-build-ppc64/meissner-lra/gcc/testsuite/gfortran1/../../gfortran -B/home/meissner/fsf-build-ppc64/meissner-lra/gcc/testsuite/gfortran1/../../ -B/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/ ./libgfortran/ /home/meissner/fsf-src/meissner-lra/gcc/testsuite/gfortran.dg/advance_1.f90 -fno-diagnostics-show-caret -O1 -pedantic-errors -B/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs -L/ho me/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs -L/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs -lm -m32 -o ./advance_1.exe PASS: gfortran.dg/advance_1.f90 -O1 (test for excess errors) Setting LD_LIBRARY_PATH to .:/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs:/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs:/home/meissner/fsf-build- ppc64/meissner-lra/gcc:/home/meissner/fsf-build-ppc64/meissner-lra/gcc/32:.:/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu/32/libgfortran/.libs:/home/meissner/fsf-build-ppc64/meissner-lra/powerpc64-unknown-linux-gnu /32/libgfortran/.libs:/home/meissner/fsf-build-ppc64/meissner-lra/gcc:/home/meissner/fsf-build-ppc64/meissner-lra/gcc/32:/home/meissner/tools/ppc64/lib:/home/meissner/tools/ppc32/lib:/home/meissner/tools-binutils/ppc64/lib:/home/meissner/to ols-binutils/ppc32/lib spawn [open ...] Program received signal SIGSEGV: Segmentation fault - invalid memory reference. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Re: patch for latest lra changes.
Hi, On 04/24/2013 05:49 PM, Vladimir Makarov wrote: The following patch incorporates some LRA changes on lra and mike-lra branches. I am committing them to get a better testing of them for x86/x86-64 on trunk. Just in case nobody reported it already, when the patch went in these regressions appeared on x86_64-linux: http://gcc.gnu.org/ml/gcc-regression/2013-04/msg00344.html Thanks, Paolo.
[Patch, bootstrap] PR 57028 Fortran bootstrap failure wrt zlib
Hi, the attached patch hopefully fixes the PR. Regtested on x86_64-unknown-linux-gnu, Ok for trunk? 2013-04-24 Janne Blomqvist j...@gcc.gnu.org PR bootstrap/57028 * Make-lang.in (f951): Link in ZLIB. (CFLAGS-module.o): Add zlib include directory. -- Janne Blomqvist fortzlib.diff Description: Binary data
patch to fix pr57046
The following patch fixes http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57046 The patch was successfully bootstrapped and tested on x86/x86-64. Committed as rev. 198263. 2013-04-24 Vladimir Makarov vmaka...@redhat.com PR rtl-optimizations/57046 * lra-constraints (split_reg): Set up lra_risky_transformations_p for multi-reg splits. 2013-04-24 Vladimir Makarov vmaka...@redhat.com PR rtl-optimizations/57046 * gcc.target/i386/pr57046.c: New test. Index: lra-constraints.c === --- lra-constraints.c (revision 198252) +++ lra-constraints.c (working copy) @@ -4198,7 +4198,7 @@ split_reg (bool before_p, int original_r { enum reg_class rclass; rtx original_reg; - int hard_regno; + int hard_regno, nregs; rtx new_reg, save, restore, usage_insn; bool after_p; bool call_save_p; @@ -4208,10 +4208,12 @@ split_reg (bool before_p, int original_r rclass = ira_allocno_class_translate[REGNO_REG_CLASS (original_regno)]; hard_regno = original_regno; call_save_p = false; + nregs = 1; } else { hard_regno = reg_renumber[original_regno]; + nregs = hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (original_regno)]; rclass = lra_get_allocno_class (original_regno); original_reg = regno_reg_rtx[original_regno]; call_save_p = need_for_call_save_p (original_regno); @@ -4324,6 +4326,13 @@ split_reg (bool before_p, int original_r before_p ? NULL_RTX : save, call_save_p ? Add save-reg : Add split-reg); + if (nregs 1) +/* If we are trying to split multi-register. We should check + conflicts on the next assignment sub-pass. IRA can allocate on + sub-register levels, LRA do this on pseudos level right now and + this discrepancy may create allocation conflicts after + splitting. */ +lra_risky_transformations_p = true; if (lra_dump_file != NULL) fprintf (lra_dump_file, \n); Index: testsuite/gcc.target/i386/pr57046.c === --- testsuite/gcc.target/i386/pr57046.c (revision 0) +++ testsuite/gcc.target/i386/pr57046.c (working copy) @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options -O2 } */ + +struct emac { +unsigned reg[23]; +}; + +struct mop { +unsigned long long addr; +unsigned int size; +}; + +unsigned int __attribute__((__noinline__)) +level(const struct emac *obj) +{ + return 0; +} + +void __attribute__((__noinline__)) +info(struct emac *dev, unsigned long long addr) +{ + asm( : : : memory); +} + +unsigned long long __attribute__((__noinline__)) +get_value(const struct mop *mop) +{ +return 0x1234567890abcdefull; +} + +int __attribute__((__noinline__)) +emac_operation(struct emac *obj, struct mop *mop) +{ +unsigned long long addr = mop-addr; +int index = addr 2; + unsigned int value, old_value; + +if (mop-size != 4) +return 0; + +if (index = 23) { +if (level(obj) = 1) +info(obj, addr); +return 0; +} + +value = get_value(mop); +old_value = obj-reg[index]; + +info(obj, 0); + +switch (index) { +case 0: +obj-reg[0] = old_value; +break; +case 7: +case 8: +obj-reg[index] = value; +break; +} + +return 0; +} + +int main(void) +{ + struct emac e = { { 0 } }; + struct mop mop = { 32, 4 }; + + e.reg[8] = 0xdeadbeef; + emac_operation(e, mop); + + if (e.reg[8] != 0x90abcdef) + __builtin_abort(); + + return 0; +}
Re: RTL gensupport - fix warning when using a match_scratch predicate
On 04/24/2013 11:41 AM, Graham Stott wrote: All Currently using the match_scratch predicate on a destination operand will trigger the warning warning: destination operand 0 allows non-lvalue, This happends because add_predicate_code() will set the pred-allows_non_lvalue when it encounters a SCRATCH rtx code which subsequently triggers the above genrecog warning/ Graham gcc/ChangeLog * gensupport.c:: (add_predicate_code) Also exclude SCRATCH from rtx codes which allow non-lvalues. My question would be in what context does it make sense to use a SCRATCH as a destination? Do you have a multi-output insn where you need to allocate a reg for one of the outputs, but you never use that output value? Or is it something totally different? SCRATCH as an rtx code is painfully under-documented. Though I think treating it just like a REG in add_predicate_code probably makes sense. Approved. Graham, do you still have write access to the repo? jeff
Re: [google][4_7] Function reordering plugin enhancements
Comments inline below. This patch brings the following to the linker function reordering plugin present in gcc-4_7 * Node profiles: Callgraph node profiles from the compiler are passed to the linker plugin. The node profiles are passed as bb entry count and max count of the corresponding function. The entry count of the split cold function is also passed when present. It is the max bb count of the split cold function, not the entry count. * With this patch, the plugin will sorts all sections that are not grouped by the plugin's callgraph according to their node weights. * New flags to the plugin to control the following: a) sort_name_prefix=yes|no: This is off by default. When this is on, the plugin groups sections by their section name prefix. b) use_maxcount=yes|no: This is on by default. This uses the max of max_count and the node weights as the actual node weight of a function. When this is off, the entry count is used as the node weight. c) edge_cutoff=a|pvalue: This can used to prune away cold callgraph edges from the linker plugin constructed callgraph. It can be expressed as a percent of the max edge value, ex: p25 for 25% or an absolute value, ex: a15000. The default is to consider all edges to be in the callgraph. d) unlikely_segment_cutoff=value: This decides the profile threshold below which functions should be considered unlikely. The default is zero. This is useful when splitting unlikely functions into a separate ELF segment using the gold linker. Handling split cold functions in the plugin will be done as a follow-up patch. Index: function_reordering_plugin/callgraph.c === --- function_reordering_plugin/callgraph.c (revision 198081) +++ function_reordering_plugin/callgraph.c (working copy) @@ -144,14 +144,18 @@ const int NUM_FUNCTIONS = 100; /* Reads off the next string from the char stream CONTENTS and updates READ_LENGTH to the length of the string read. The value of CONTENTS - is updated to start at the next string. */ + is updated to start at the next string. UPDATE_CONTENTS tells if + CONTENTS must be moved past the read string to the next string. To + peek at the string, UPDATE_CONTENTS can be set to false. */ static char * -get_next_string (char **contents, unsigned int *read_length) +get_next_string (char **contents, unsigned int *read_length, + int update_contents) Does the plugin have access to type bool? { char *s = *contents; *read_length = strlen (*contents) + 1; - *contents += *read_length; + if (update_contents) +*contents += *read_length; return s; } @@ -192,7 +196,7 @@ remove_edge_from_list (Edge * curr_edge) /* Adds the WEIGHT value to the edge count of CALLER and CALLEE. */ static void -update_edge (Node *n1, Node *n2, unsigned int weight) +update_edge (Node *n1, Node *n2, unsigned long long weight) { void **slot; Raw_edge re, *r; @@ -227,6 +231,9 @@ static void e = *slot; e-weight += weight; } + /* Update the computed_weight, the computed node weight, of n2 which is the + sum of weights of all incoming edges to n2. */ Comment would read clearer as something like Update the computed node weight for n2, which is the sum of its incoming edge weights. + n2-computed_weight += weight; } /* Create a unique node for a function. */ @@ -288,10 +295,14 @@ void dump_edges (FILE *fp) it != NULL; it = it-next) { - fprintf (fp,# %s (%u) %s\n, + fprintf (fp,# %s (%llu, %llu) (%llu) %s (%llu, %llu)\n, it-first_function-name, + it-first_function-weight, + it-first_function-computed_weight, it-weight, - it-second_function-name); + it-second_function-name, + it-second_function-weight, + it-second_function-computed_weight); } } @@ -320,6 +331,8 @@ canonicalize_function_name (void *file_handle, cha call graph edges with appropriate weights. The section contents have the following format : Function caller_name + Weight entry_count max_count (optional line) + ColdWeight max_count (optional line) callee_1 edge count between caller and callee_1 callee_2 @@ -332,30 +345,85 @@ parse_callgraph_section_contents (void *file_handl { char *contents; char *caller; + char *node_weight_s = NULL; unsigned int read_length = 0, curr_length = 0; Node *caller_node; /* HEADER_LEN is the length of string 'Function '. */ const int HEADER_LEN = 9; - /* First string in contents is 'Function function-name'. */ + /* Prefix of line containing node weights. */ + const char *NODE_WEIGHT_PREFIX = Weight ; + /* Prefix of line containing max bb count of cold split part. */ + const char *SPLIT_FUNCTION_PREFIX = ColdWeight ; + + /* First string in contents is
[gomp4] C FE OpenMP 4.0 parsing stuff
Hi! This patch brings the C FE roughly on feature parity with what has been done earlier to the C++ FE only. Thus, #pragma omp simd should work with C FE now, etc. 2013-04-24 Jakub Jelinek ja...@redhat.com c/ * c-parser.c (c_parser_compound_statement, c_parser_statement): Adjust comments for OpenMP 3.0+ additions. (c_parser_pragma): Handle PRAGMA_OMP_CANCEL and PRAGMA_OMP_CANCELLATION_POINT. (c_parser_omp_clause_name): Handle new OpenMP 4.0 clauses. (c_parser_omp_clause_collapse): Fully fold collapse expression. (c_parser_omp_clause_branch, c_parser_omp_clause_cancelkind, c_parser_omp_clause_num_teams, c_parser_omp_clause_aligned, c_parser_omp_clause_linear, c_parser_omp_clause_safelen, c_parser_omp_clause_simdlen, c_parser_omp_clause_depend, c_parser_omp_clause_map, c_parser_omp_clause_device, c_parser_omp_clause_dist_schedule, c_parser_omp_clause_proc_bind, c_parser_omp_clause_to, c_parser_omp_clause_from, c_parser_omp_clause_uniform): New functions. (c_parser_omp_all_clauses): Handle new OpenMP 4.0 clauses. (c_parser_omp_for_loop): Add CODE argument, pass it through to c_finish_omp_for. (OMP_SIMD_CLAUSE_MASK): Define. (c_parser_omp_simd): New function. (c_parser_omp_for): Parse #pragma omp for simd. (OMP_PARALLEL_CLAUSE_MASK): Add OMP_CLAUSE_PROC_BIND. (c_parser_omp_parallel): Parse #pragma omp parallel for simd. (OMP_TASK_CLAUSE_MASK): Add OMP_CLAUSE_DEPEND. (c_parser_omp_taskgroup): New function. (OMP_CANCEL_CLAUSE_MASK, OMP_CANCELLATION_POINT_CLAUSE_MASK): Define. (c_parser_omp_cancel, c_parser_omp_cancellation_point): New functions. (c_parser_omp_construct): Handle PRAGMA_OMP_SIMD and PRAGMA_OMP_TASKGROUP. (c_parser_transaction_cancel): Formatting fix. * c-tree.h (c_begin_omp_taskgroup, c_finish_omp_taskgroup, c_finish_omp_cancel, c_finish_omp_cancellation_point): New prototypes. * c-typeck.c (c_begin_omp_taskgroup, c_finish_omp_taskgroup, c_finish_omp_cancel, c_finish_omp_cancellation_point): New functions. (c_finish_omp_clauses): Handle new OpenMP 4.0 clauses. cp/ * parser.c (cp_parser_omp_clause_name): Add missing break after case 'i'. (cp_parser_omp_cancellation_point): Diagnose error if #pragma omp cancellation isn't followed by point. * semantics.c (finish_omp_clauses): Complain also about zero in alignment of aligned directive or safelen/simdlen expressions. (finish_omp_cancel): Fix up diagnostics wording. testsuite/ * c-c++-common/gomp/simd1.c: Enable also for C. * c-c++-common/gomp/simd2.c: Likewise. * c-c++-common/gomp/simd3.c: Likewise. * c-c++-common/gomp/simd4.c: Likewise. Adjust expected diagnostics for C. * c-c++-common/gomp/simd5.c: Enable also for C. --- gcc/c/c-parser.c.jj 2013-03-27 13:01:09.0 +0100 +++ gcc/c/c-parser.c2013-04-24 18:22:37.195711949 +0200 @@ -1186,6 +1186,8 @@ static void c_parser_omp_barrier (c_pars static void c_parser_omp_flush (c_parser *); static void c_parser_omp_taskwait (c_parser *); static void c_parser_omp_taskyield (c_parser *); +static void c_parser_omp_cancel (c_parser *); +static void c_parser_omp_cancellation_point (c_parser *); enum pragma_context { pragma_external, pragma_stmt, pragma_compound }; static bool c_parser_pragma (c_parser *, enum pragma_context); @@ -4054,7 +4056,11 @@ c_parser_initval (c_parser *parser, stru openmp-directive: barrier-directive - flush-directive */ + flush-directive + taskwait-directive + taskyield-directive + cancel-directive + cancellation-point-directive */ static tree c_parser_compound_statement (c_parser *parser) @@ -4384,9 +4390,12 @@ c_parser_label (c_parser *parser) openmp-construct: parallel-construct for-construct + simd-construct + for-simd-construct sections-construct single-construct parallel-for-construct + parallel-for-simd-construct parallel-sections-construct master-construct critical-construct @@ -4399,6 +4408,12 @@ c_parser_label (c_parser *parser) for-construct: for-directive iteration-statement + simd-construct: + simd-directive iteration-statements + + for-simd-construct: + for-simd-directive iteration-statements + sections-construct: sections-directive section-scope @@ -4408,6 +4423,9 @@ c_parser_label (c_parser *parser) parallel-for-construct: parallel-for-directive iteration-statement + parallel-for-simd-construct: + parallel-for-simd-directive iteration-statement + parallel-sections-construct: parallel-sections-directive section-scope @@ -8606,6 +8624,28 @@ c_parser_pragma (c_parser *parser, enum
[patch] libstdc++/56905 deprecate copy_exception
The C++0x draft defined std::copy_exception but it was renamed to std::make_exception_ptr in the final C++11 standard. This changes the library to use the new name and deprecates the old one, so we can remove it one day. PR libstdc++/56905 * libsupc++/exception_ptr.h (copy_exception): Deprecate and move implementation to make_exception_ptr. * include/std/future (_State_base::_M_break_promise): Replace copy_exception with make_exception_ptr. * testsuite/18_support/exception_ptr/move.cc: Likewise. * testsuite/18_support/exception_ptr/rethrow_exception.cc: Likewise. * testsuite/30_threads/future/members/get2.cc: Likewise. * testsuite/30_threads/promise/members/set_exception.cc: Likewise. * testsuite/30_threads/promise/members/set_exception2.cc: Likewise. * testsuite/30_threads/promise/members/set_value2.cc: Likewise. * testsuite/30_threads/shared_future/members/get2.cc: Likewise. Tested x86_64-linux, committed to trunk. commit d6085858a53401f853913d9e58337e4a467a7460 Author: Jonathan Wakely jwakely@gmail.com Date: Wed Apr 24 22:40:11 2013 +0100 PR libstdc++/56905 * libsupc++/exception_ptr.h (copy_exception): Deprecate and move implementation to make_exception_ptr. * include/std/future (_State_base::_M_break_promise): Replace copy_exception with make_exception_ptr. * testsuite/18_support/exception_ptr/move.cc: Likewise. * testsuite/18_support/exception_ptr/rethrow_exception.cc: Likewise. * testsuite/30_threads/future/members/get2.cc: Likewise. * testsuite/30_threads/promise/members/set_exception.cc: Likewise. * testsuite/30_threads/promise/members/set_exception2.cc: Likewise. * testsuite/30_threads/promise/members/set_value2.cc: Likewise. * testsuite/30_threads/shared_future/members/get2.cc: Likewise. diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future index a86a7ca..6d6b32b 100644 --- a/libstdc++-v3/include/std/future +++ b/libstdc++-v3/include/std/future @@ -361,7 +361,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (static_castbool(__res)) { error_code __ec(make_error_code(future_errc::broken_promise)); - __res-_M_error = copy_exception(future_error(__ec)); + __res-_M_error = make_exception_ptr(future_error(__ec)); { lock_guardmutex __lock(_M_mutex); _M_result.swap(__res); diff --git a/libstdc++-v3/libsupc++/exception_ptr.h b/libstdc++-v3/libsupc++/exception_ptr.h index bbf8f85..effab347 100644 --- a/libstdc++-v3/libsupc++/exception_ptr.h +++ b/libstdc++-v3/libsupc++/exception_ptr.h @@ -166,7 +166,7 @@ namespace std /// Obtain an exception_ptr pointing to a copy of the supplied object. templatetypename _Ex exception_ptr -copy_exception(_Ex __ex) _GLIBCXX_USE_NOEXCEPT +make_exception_ptr(_Ex __ex) _GLIBCXX_USE_NOEXCEPT { __try { @@ -183,10 +183,15 @@ namespace std // _GLIBCXX_RESOLVE_LIB_DEFECTS // 1130. copy_exception name misleading /// Obtain an exception_ptr pointing to a copy of the supplied object. + /// This function is deprecated, use std::make_exception_ptr instead. templatetypename _Ex -exception_ptr -make_exception_ptr(_Ex __ex) _GLIBCXX_USE_NOEXCEPT -{ return std::copy_exception_Ex(__ex); } +exception_ptr +copy_exception(_Ex __ex) _GLIBCXX_USE_NOEXCEPT _GLIBCXX_DEPRECATED; + + templatetypename _Ex +exception_ptr +copy_exception(_Ex __ex) _GLIBCXX_USE_NOEXCEPT +{ return std::make_exception_ptr_Ex(__ex); } // @} group exceptions } // namespace std diff --git a/libstdc++-v3/testsuite/18_support/exception_ptr/move.cc b/libstdc++-v3/testsuite/18_support/exception_ptr/move.cc index 2b7284d..9c3df1e 100644 --- a/libstdc++-v3/testsuite/18_support/exception_ptr/move.cc +++ b/libstdc++-v3/testsuite/18_support/exception_ptr/move.cc @@ -28,7 +28,7 @@ void test01() { bool test = true; - std::exception_ptr p1 = std::copy_exception(test); + std::exception_ptr p1 = std::make_exception_ptr(test); std::exception_ptr p2 = std::move(p1); VERIFY( p1 == 0 ); VERIFY( !(p2 == 0) ); diff --git a/libstdc++-v3/testsuite/18_support/exception_ptr/rethrow_exception.cc b/libstdc++-v3/testsuite/18_support/exception_ptr/rethrow_exception.cc index 430913c..39a57fe 100644 --- a/libstdc++-v3/testsuite/18_support/exception_ptr/rethrow_exception.cc +++ b/libstdc++-v3/testsuite/18_support/exception_ptr/rethrow_exception.cc @@ -34,7 +34,7 @@ void test01() using namespace std; try { -rethrow_exception(copy_exception(0)); +rethrow_exception(make_exception_ptr(0)); } catch(...) { } } @@ -45,7 +45,7 @@ void test02() using namespace std; try { -rethrow_exception(copy_exception(runtime_error(test))); +rethrow_exception(make_exception_ptr(runtime_error(test))); } catch(exception
Re: [patch] libstdc++/56905 deprecate copy_exception
On 24 April 2013 23:00, Jonathan Wakely wrote: The C++0x draft defined std::copy_exception but it was renamed to std::make_exception_ptr in the final C++11 standard. This changes the library to use the new name and deprecates the old one, so we can remove it one day. PR libstdc++/56905 * libsupc++/exception_ptr.h (copy_exception): Deprecate and move implementation to make_exception_ptr. * include/std/future (_State_base::_M_break_promise): Replace copy_exception with make_exception_ptr. * testsuite/18_support/exception_ptr/move.cc: Likewise. * testsuite/18_support/exception_ptr/rethrow_exception.cc: Likewise. * testsuite/30_threads/future/members/get2.cc: Likewise. * testsuite/30_threads/promise/members/set_exception.cc: Likewise. * testsuite/30_threads/promise/members/set_exception2.cc: Likewise. * testsuite/30_threads/promise/members/set_value2.cc: Likewise. * testsuite/30_threads/shared_future/members/get2.cc: Likewise. Tested x86_64-linux, committed to trunk. Here's the wwwdocs patch for the release notes. Index: htdocs/gcc-4.9/changes.html === RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.9/changes.html,v retrieving revision 1.7 diff -u -r1.7 changes.html --- htdocs/gcc-4.9/changes.html 24 Apr 2013 16:21:39 - 1.7 +++ htdocs/gcc-4.9/changes.html 24 Apr 2013 22:03:37 - @@ -88,6 +88,15 @@ /li /ul + h4Runtime Library (libstdc++)/h4 + + ul +liThe non-standard function codestd::copy_exception/code has been deprecated +and will be removed in a future version. codestd::make_exception_ptr/code +should be used instead. +/li + /ul + h3 id=fortranFortran/h3 ul liCompatibility notice:
Re: patch for latest lra changes.
On 13-04-24 3:45 PM, Paolo Carlini wrote: Hi, On 04/24/2013 05:49 PM, Vladimir Makarov wrote: The following patch incorporates some LRA changes on lra and mike-lra branches. I am committing them to get a better testing of them for x86/x86-64 on trunk. Just in case nobody reported it already, when the patch went in these regressions appeared on x86_64-linux: http://gcc.gnu.org/ml/gcc-regression/2013-04/msg00344.html I can not fix it quickly. So I reverted the patch. Thanks.
[patch] Fix non-deterministic output order for type stubs in DWARF output
Jason, would you like to take a look at this patch before I commit it? If I turn on -fdebug-types-section, I get a random failure in g++.dg/pch/system-2.C. This failure is being caused by non-deterministic order of the type stubs emitted by optimize_external_refs, because it's using the address of the DIE as a hash for the extern_map hash table. This patch makes the output order deterministic by using either the die_symbol (as a string) or the type signature for the hash value. Bootstrapped on x86_64 with no regressions. (I'm not planning to turn on -fdebug-types-section by default; I'm just trying to make sure the test suite is clean with it on so that I can turn it on in the google branches.) -cary 2013-04-24 Cary Coutant ccout...@google.com gcc/ * dwarf2out.c (hash_external_ref): Use die_symbol or signature for hash so that hash table traversal order is deterministic. Index: gcc/dwarf2out.c === --- gcc/dwarf2out.c (revision 198260) +++ gcc/dwarf2out.c (working copy) @@ -7385,7 +7385,22 @@ static hashval_t hash_external_ref (const void *p) { const struct external_ref *r = (const struct external_ref *)p; - return htab_hash_pointer (r-type); + dw_die_ref die = r-type; + hashval_t h = 0; + + /* We can't use the address of the DIE for hashing, because + that will make the order of the stub DIEs non-deterministic. */ + if (! die-comdat_type_p) +/* We have a symbol; use it to compute a hash. */ +h = htab_hash_string (die-die_id.die_symbol); + else +{ + /* We have a type signature; use a subset of the bits as the hash. + The 8-byte signature is at least as large as hashval_t. */ + comdat_type_node_ref type_node = die-die_id.die_type_node; + memcpy (h, type_node-signature, sizeof (h)); +} + return h; } /* Compare external_refs. */
Re: [patch rfa] Fix PCH test failure when -fdebug-types-section is enabled
Generated code should not depend on actual values of pointers - which often happens when you traverse a hashtable hashing pointers. Is this what is done here? Then I suggest to hash on something different or sort after sth different before outputting. That was it, thanks! We were emitting type stub DIEs via a hash table traversal where the address of the DIE was being used as the hash. -cary
Re: Minimize downward code motion during reassociation
I want to resend this patch for consideration. I applied the patch to trunk and confirmed that it bootstraps and doesn't cause test regressions. Is this ok for trunk? Thanks, Easwaran On Fri, Dec 7, 2012 at 12:01 PM, Easwaran Raman era...@google.com wrote: It seems I need to reset the debug uses of a statement before moving the statement itself. The attached patch starts from the leaf to root of the tree to be reassociated and places them at the point where their dependences will be met after reassociation. This bootstraps and I am running the tests. Ok if there are no test failures? Thanks, Easwaran 2012-12-07 Easwaran Raman era...@google.com * tree-ssa-reassoc.c(find_insert_point): New function. (insert_stmt_after): Likewise. (get_def_stmt): Likewise. (ensure_ops_are_available): Likewise. (rewrite_expr_tree): Do not move statements beyond what is necessary. Remove call to swap_ops_for_binary_stmt... (reassociate_bb): ... and move it here. (build_and_add_sum): Assign UIDs for new statements. (linearize_expr): Likewise. (do_reassoc): Renumber gimple statement UIDs. On Thu, Dec 6, 2012 at 1:10 AM, Richard Biener richard.guent...@gmail.com wrote: On Tue, Nov 6, 2012 at 1:54 AM, Easwaran Raman era...@google.com wrote: I am unable to figure out the right way to handle the debug statements. What I tried was to find debug statements that use the SSA name defined by the statement I moved (using SSA_NAME_IMM_USE_NODE) and then moved them as well at the right place. Thus, if I have to move t1 = a + b down (after the definition of 'd'), I also moved all debug statements that use t1 after the new position of t1. That still caused use-before-def problems in ssa_verify. I noticed that the debug statements got modified behind the scenes causing these issues. Any hints on what is the right way to handle the debug statements would be very helpful. I think you cannot (and should not) move debug statements. Instead you have to invalidate them. Otherwise you'll introduce confusion as debug info cannot handle overlapping live ranges. But maybe Alex can clarify. Richard.
Re: patch to fix constant math -5th patch, rtl
On 04/24/2013 11:13 AM, Richard Biener wrote: On Wed, Apr 24, 2013 at 5:00 PM, Richard Sandiford rdsandif...@googlemail.com wrote: Richard Bienerrichard.guent...@gmail.com writes: On Wed, Apr 24, 2013 at 4:29 PM, Richard Sandiford rdsandif...@googlemail.com wrote: In other words, one of the reasons wide_int can't be exactly 1:1 in practice is because it is clearing out these mistakes (GEN_INT rather than gen_int_mode) and missing features (non-power-of-2 widths). Note that the argument should be about CONST_WIDE_INT here, not wide-int. Indeed CONST_WIDE_INT has the desired feature and can be properly truncated/extended according to mode at the time we build it via immed_wide_int_cst (w, mode). I don't see the requirement that wide-int itself is automagically providing that truncation/extension (though it is a possibility, one that does not match existing behavior of HWI for CONST_INT or double-int for CONST_DOUBLE). I agree it doesn't match the existing behaviour of HWI for CONST_INT or double-int for CONST_DOUBLE, but I think that's very much a good thing. The model for HWIs at the moment is that you have to truncate results to the canonical form after every operation where it matters. As you proved in your earlier message about the plus_constant bug, that's easily forgotten. I don't think the rtl code is doing all CONST_INT arithmetic on full HWIs because it wants to: it's doing it because that's the way C/C++ arithmetic on primitive types works. In other words, the current CONST_INT code is trying to emulate N-bit arithmetic (for gcc runtime N) using a single primitive integer type. wide_int gives us N-bit arithmetic directly; no emulation is needed. Ok, so what wide-int provides is integer values encoded in 'len' HWI words that fit in 'precision' or more bits (and often in less). wide-int also provides N-bit arithmetic operations. IMHO both are tied too closely together. A give constant doesn't really have a precision. Associating one with it to give a precision to an arithmetic operation looks wrong to me and are a source of mismatches. What RTL currently has looks better to me - operations have explicitely specified precisions. I have tried very hard to make wide-int work very efficiently with both tree and rtl without biasing the rep towards either representation. Both rtl and trees constants have a precision. In tree, constants are done better than in rtl because the tree really does have a field that is filled in that points to a type. However, that does not mean that rtl constants do not have a precision: currently you have to look around at the context to find the mode of a constant that is in your hand, but it is in fact always there. At the rtl level, you can see the entire patch - we always find an appropriate mode. In the future, this may change. Wide-int moves one step closer in that ports that support it will not expect that double-ints never have a mode. But that is a long way from having the mode attached. What is not stored with the constant is a indication of the signedness. Unlike a desire to add modes to rtl constants, there is no one even thinking about the sign. The sign is implicit in the operator, just as it is at the tree level. So when i designed wide-int, i assumed that i could get precisions from the variables or at least close to them. As far as the question of infinite precision, 99% of the uses of double-int today are get in, do a single operation and get out. If this is all that we plan to do, then it does not really matter if it is infinite precision or not, because at both the rtl and tree level, we truncate on the way out. However, the use of double-int accounts for only a small percentage of the math done in the compiler. My wide-int port converts a substantial portion of the math from inline code that is guarded by checks to the precision against HOST_WIDE_BITS_PER_INT or calls to host_integerp. The conversion of this code has substantial potential to expose the differences between the fixed precision and infinite precision representations. The only justification that you have ever given for wanting to use infinite precision is that it is cleaner. You have never directly addressed my point that it gives surprising answers except to say that the user would have to put in explicit intermediate truncations.It is hard for me to imaging buggering up something as bad as having to put in explicit intermediate truncations. When i write a * b / c, it should really look something like the expression. If your point is that an arbitrary-precision wide_int could be used by other (non-rtl, and probably non-tree) clients, then I don't really see the need. We already have mpz_t for that. What we don't have, and what we IMO need, is something that performs N-bit arithmetic for runtime N. It seems better to have a single class that does that for us (wide_int), rather than scatter N-bit
Re: [gomp4] Some progress on #pragma omp simd
[Balaji, see below]. Ok, this is confusing. While the document in the link you posted (the ICC manual?) says so, the document I'm following says otherwise. I'm following this (which, until a few days was a link accessible from the cilk plus web page, though I no longer see it): http://software.intel.com/sites/default/files/m/4/e/7/3/1/40297-Intel_Cilk_plus_lang_spec_2.htm The document above is for version 1.1 of the Cilk Plus language extension specification, which I was told was the latest. There it explicitly says that the clauses behave exactly like in OpenMP: The syntax and semantics of the various simd-openmp-data-clauses are detailed in the OpenMP specification. (http://www.openmp.org/mp-documents/spec30.pdf, Section 2.9.3). Balaji, can you verify which is correct? For that matter, which are the official specs from which we should be basing this work? Aldy On 04/24/13 01:40, Jakub Jelinek wrote: On Wed, Apr 24, 2013 at 08:25:36AM +0200, Jakub Jelinek wrote: BTW, the semantics of private/firstprivate/lastprivate desribed in http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/cref_cls/common/cppref_pragma_simd.htm doesn't seem to match the semantics of those in #pragma omp simd. private in OpenMP I understand is private to the whole loop (or SIMD lane?; SIMD lane apparently. Guess that is going to be quite difficult, because at the point of omp lowering or expansion we are nowhere close to knowing what vectorization factor we are going to choose, all we have is an upper bound on that based on the target ISA and safelen clause. If say private clause is used with C++ classes with non-trivial ctors/dtors that would make a difference. Plus how to represent this in the IL. struct A { A (); ~A (); A (const A ); int i; }; void foo () { A a, b; #pragma omp simd private (a) lastprivate (b) for (int i = 0; i 10; i++) { a.i++; b.i++; } } Right now what gomp4 branch does is that it will just construct private vars around the whole loop, as in: void foo () { A a, b; { A a', b'; int i; for (i = 0; i 10; i++) { a'.i++; b'.i++; if (i == 9) b = b'; } } } Jakub
Re: [gomp4] Some progress on #pragma omp simd
On 04/24/13 18:22, Aldy Hernandez wrote: Hmmm, furthermore, even if the simd + private semantics in Cilk Plus have the same semantics of the OpenMP standard, is it the OpenMP 3.0 semantics like the openmp link suggests (private to task), or is it the OpenMP 4.0 rc 2 semantics which you suggest (private to SIMD lane)? Lemme ask icc. [Balaji, see below]. Ok, this is confusing. While the document in the link you posted (the ICC manual?) says so, the document I'm following says otherwise. I'm following this (which, until a few days was a link accessible from the cilk plus web page, though I no longer see it): http://software.intel.com/sites/default/files/m/4/e/7/3/1/40297-Intel_Cilk_plus_lang_spec_2.htm The document above is for version 1.1 of the Cilk Plus language extension specification, which I was told was the latest. There it explicitly says that the clauses behave exactly like in OpenMP: The syntax and semantics of the various simd-openmp-data-clauses are detailed in the OpenMP specification. (http://www.openmp.org/mp-documents/spec30.pdf, Section 2.9.3). Balaji, can you verify which is correct? For that matter, which are the official specs from which we should be basing this work? Aldy On 04/24/13 01:40, Jakub Jelinek wrote: On Wed, Apr 24, 2013 at 08:25:36AM +0200, Jakub Jelinek wrote: BTW, the semantics of private/firstprivate/lastprivate desribed in http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/cref_cls/common/cppref_pragma_simd.htm doesn't seem to match the semantics of those in #pragma omp simd. private in OpenMP I understand is private to the whole loop (or SIMD lane?; SIMD lane apparently. Guess that is going to be quite difficult, because at the point of omp lowering or expansion we are nowhere close to knowing what vectorization factor we are going to choose, all we have is an upper bound on that based on the target ISA and safelen clause. If say private clause is used with C++ classes with non-trivial ctors/dtors that would make a difference. Plus how to represent this in the IL. struct A { A (); ~A (); A (const A ); int i; }; void foo () { A a, b; #pragma omp simd private (a) lastprivate (b) for (int i = 0; i 10; i++) { a.i++; b.i++; } } Right now what gomp4 branch does is that it will just construct private vars around the whole loop, as in: void foo () { A a, b; { A a', b'; int i; for (i = 0; i 10; i++) { a'.i++; b'.i++; if (i == 9) b = b'; } } } Jakub
Re: [gomp4] Some progress on #pragma omp simd
On 04/24/13 01:01, Jakub Jelinek wrote: The documentation doesn't suggest that. Anyway, so #pragma simd should be equivalent to #pragma omp simd wrt. inter-iteration dependencies, and #pragma simd vectorlength(a, b, c) to #pragma omp simd safelen(max (a, b, c)) ? If so, then the FE could emit OMP_SIMD for #pragma simd, and if vectorlength is present, add OMP_CLAUSE_SAFELEN with the maximum of the values in all vectorlength clauses, and keep the vectorlength clauses around too as CILK_CLAUSE_VECTORLENGTH as hints to the vectorizer? I can make these changes to the Cilk Plus code. Also, Aldy said that #pragma simd loops allow != condition, how do you compute number of iterations in that case if the increment isn't constant? As conditional depending on whether increment is positive or negative? != condition isn't allowed in OpenMP, so there it is always obvious which direction it should iterate, and the expansion code will assume if it sees NE_EXPR that it is just folded border test (comparison with maximum or minimum value). Perhaps it is an oversight in the documentation, because icc disallows both == and !=. Disallowing != makes it a lot easier :).
[Patch][google/gcc-4_8] Backport r198101 (fix for PR56847) into google/gcc-4_8
Hi, this patch back port trunk@198101 to fix PR rtl-optimization/56847. Passed bootstrap and regression test. Ok for branch google/gcc-4_8? 2013-04-19 Vladimir Makarov vmaka...@redhat.com PR rtl-optimization/56847 * lra-constraints.c (process_alt_operands): Discourage alternative with non-matche doffsettable memory constraint fro memory with known offset. diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index e3b4add..9e4924c 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1978,8 +1978,15 @@ process_alt_operands (int only_alternative) (op, this_alternative) == NO_REGS reject += LRA_MAX_REJECT; - if (! ((const_to_mem constmemok) - || (MEM_P (op) offmemok))) + if (MEM_P (op) offmemok) + { + /* If we know offset and this non-offsetable memory, + something wrong with this memory and it is better + to try other memory possibilities. */ + if (MEM_OFFSET_KNOWN_P (op)) +reject += LRA_MAX_REJECT; + } + else if (! (const_to_mem constmemok)) { /* We prefer to reload pseudos over reloading other things, since such reloads may be able to be diff --git a/gcc/testsuite/gcc.dg/pr56847.c b/gcc/testsuite/gcc.dg/pr56847.c new file mode 100644 index 000..b94aae1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr56847.c @@ -0,0 +1,12 @@ +/* PR rtl-optimization/56847 */ +/* { dg-do compile { target pie } } */ +/* { dg-options -O2 -fpie } */ + +struct S { long int a, b; } e; +__thread struct S s; + +void +foo (void) +{ + s = e; +} H.
Re: [Patch][google/gcc-4_8] Backport r198101 (fix for PR56847) into google/gcc-4_8
Looks good. Dehao On Wed, Apr 24, 2013 at 5:49 PM, Han Shen(沈涵) shen...@google.com wrote: Hi, this patch back port trunk@198101 to fix PR rtl-optimization/56847. Passed bootstrap and regression test. Ok for branch google/gcc-4_8? 2013-04-19 Vladimir Makarov vmaka...@redhat.com PR rtl-optimization/56847 * lra-constraints.c (process_alt_operands): Discourage alternative with non-matche doffsettable memory constraint fro memory with known offset. diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index e3b4add..9e4924c 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1978,8 +1978,15 @@ process_alt_operands (int only_alternative) (op, this_alternative) == NO_REGS reject += LRA_MAX_REJECT; - if (! ((const_to_mem constmemok) - || (MEM_P (op) offmemok))) + if (MEM_P (op) offmemok) + { + /* If we know offset and this non-offsetable memory, + something wrong with this memory and it is better + to try other memory possibilities. */ + if (MEM_OFFSET_KNOWN_P (op)) +reject += LRA_MAX_REJECT; + } + else if (! (const_to_mem constmemok)) { /* We prefer to reload pseudos over reloading other things, since such reloads may be able to be diff --git a/gcc/testsuite/gcc.dg/pr56847.c b/gcc/testsuite/gcc.dg/pr56847.c new file mode 100644 index 000..b94aae1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr56847.c @@ -0,0 +1,12 @@ +/* PR rtl-optimization/56847 */ +/* { dg-do compile { target pie } } */ +/* { dg-options -O2 -fpie } */ + +struct S { long int a, b; } e; +__thread struct S s; + +void +foo (void) +{ + s = e; +} H.
Re: [gomp4] Some progress on #pragma omp simd
On Tue, Apr 23, 2013 at 09:32:29PM +, Iyer, Balaji V wrote: My apologies if the documentation did not explain this correctly. It was written by compiler developers and not language developers. #pragma simd is the guarantee the user gives the compiler that the inter-iteration dependencies do not matter. So, if the user omits the vectorlength the clause then the compiler can, in effect, choose N, where N is the number of loop iterations. The documentation doesn't suggest that. Anyway, so #pragma simd should be equivalent to #pragma omp simd wrt. inter-iteration dependencies, and #pragma simd vectorlength(a, b, c) to #pragma omp simd safelen(max (a, b, c)) ? If so, then the FE could emit OMP_SIMD for #pragma simd, and if vectorlength is present, add OMP_CLAUSE_SAFELEN with the maximum of the values in all vectorlength clauses, and keep the vectorlength clauses around too as CILK_CLAUSE_VECTORLENGTH as hints to the vectorizer? Also, Aldy said that #pragma simd loops allow != condition, how do you compute number of iterations in that case if the increment isn't constant? As conditional depending on whether increment is positive or negative? != condition isn't allowed in OpenMP, so there it is always obvious which direction it should iterate, and the expansion code will assume if it sees NE_EXPR that it is just folded border test (comparison with maximum or minimum value). Jakub
Re: [gomp4] Some progress on #pragma omp simd
On Wed, Apr 24, 2013 at 08:01:17AM +0200, Jakub Jelinek wrote: On Tue, Apr 23, 2013 at 09:32:29PM +, Iyer, Balaji V wrote: The documentation doesn't suggest that. Anyway, so #pragma simd should be equivalent to #pragma omp simd wrt. inter-iteration dependencies, and #pragma simd vectorlength(a, b, c) to #pragma omp simd safelen(max (a, b, c)) ? If so, then the FE could emit OMP_SIMD for #pragma simd, and if vectorlength is present, add OMP_CLAUSE_SAFELEN with the maximum of the values in all vectorlength clauses, and keep the vectorlength clauses around too as CILK_CLAUSE_VECTORLENGTH as hints to the vectorizer? Also, Aldy said that #pragma simd loops allow != condition, how do you compute number of iterations in that case if the increment isn't constant? As conditional depending on whether increment is positive or negative? != condition isn't allowed in OpenMP, so there it is always obvious which direction it should iterate, and the expansion code will assume if it sees NE_EXPR that it is just folded border test (comparison with maximum or minimum value). BTW, the semantics of private/firstprivate/lastprivate desribed in http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/cref_cls/common/cppref_pragma_simd.htm doesn't seem to match the semantics of those in #pragma omp simd. private in OpenMP I understand is private to the whole loop (or SIMD lane?; at least, that was the semantics of #pragma omp for too and there is no wording to suggest otherwise for #pragma omp simd or #pragma omp for simd), while the above html suggests in Cilk+ it is private to each iteration. #pragma omp simd doesn't support firstprivate. The lastprivate semantics wrt. returning the last iteration's value is the same. Jakub
Re: [Patch] Add microMIPS jraddiusp support
Moore, Catherine catherine_mo...@mentor.com writes: @@ -11596,12 +11604,18 @@ mips_expand_epilogue (bool sibcall_p) rtx reg = gen_rtx_REG (Pmode, GP_REG_FIRST + 7); pat = gen_return_internal (reg); } + else if (use_jraddiusp_p) + { + pat = gen_jraddiusp (GEN_INT (step2)); + } Redundant braces, should be: else if (use_jraddiusp_p) pat = gen_jraddiusp (GEN_INT (step2)); OK with that change, thanks. Richard
Re: [patch][mips] split mips_reorg in pre- and post-dbr_schedule parts
Steven Bosscher stevenb@gmail.com writes: *ping* MIPS maintainers... Patch is OK. Sorry for the slow review, been on holiday. Richard
Re: [gomp4] Some progress on #pragma omp simd
On Wed, Apr 24, 2013 at 08:25:36AM +0200, Jakub Jelinek wrote: BTW, the semantics of private/firstprivate/lastprivate desribed in http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/cref_cls/common/cppref_pragma_simd.htm doesn't seem to match the semantics of those in #pragma omp simd. private in OpenMP I understand is private to the whole loop (or SIMD lane?; SIMD lane apparently. Guess that is going to be quite difficult, because at the point of omp lowering or expansion we are nowhere close to knowing what vectorization factor we are going to choose, all we have is an upper bound on that based on the target ISA and safelen clause. If say private clause is used with C++ classes with non-trivial ctors/dtors that would make a difference. Plus how to represent this in the IL. struct A { A (); ~A (); A (const A ); int i; }; void foo () { A a, b; #pragma omp simd private (a) lastprivate (b) for (int i = 0; i 10; i++) { a.i++; b.i++; } } Right now what gomp4 branch does is that it will just construct private vars around the whole loop, as in: void foo () { A a, b; { A a', b'; int i; for (i = 0; i 10; i++) { a'.i++; b'.i++; if (i == 9) b = b'; } } } Jakub
Re: [patch, mips] Fix for PR target/56942
Steve Ellcey sell...@imgtec.com writes: 2013-04-19 Andrew Bennett andrew.benn...@imgtec.com Steve Ellcey sell...@imgtec.com PR target/56942 * config/mips/mips.md (casesi_internal_mips16_mode): Use next_active_insn instead of next_real_insn. Hmm, I don't really like this. Steven said from ARM in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56809: --- Target bug, this is wrong: rtx diff_vec = PATTERN (next_real_insn (operands[2])); A jump_table_data is not a real insn. Before my patch this worked by accident because the jump table would hide in a JUMP_INSN and next_real_insn returned any JUMP_P insn. Use next_active_insn instead. --- But using next_real_insn was at least as correct (IMO, more correct) as next_active_insn before r197266. It seems counterintuitive that something can be active but not real. Richard
[PATCH] Maybe fix PR57050
This avoids sigjmp_buf use which maybe fixes newlib targets. Tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-04-24 Richard Biener rguent...@suse.de PR testsuite/57050 * gcc.c-torture/execute/pr56982.c: Avoid sigjmp_buf use. Index: gcc/testsuite/gcc.c-torture/execute/pr56982.c === --- gcc/testsuite/gcc.c-torture/execute/pr56982.c (revision 198216) +++ gcc/testsuite/gcc.c-torture/execute/pr56982.c (working copy) @@ -1,8 +1,10 @@ -#include stdlib.h #include setjmp.h -static sigjmp_buf env; -void *stderr; +extern void abort (void); +extern void exit (int); + +static jmp_buf env; + void baz (void) { __asm__ volatile ( : : : memory);
Re: [PATCH, ARM] Fix PR56797
On 23/04/13 17:37, Greta Yorsh wrote: Ok to backport to gcc4.8? I'm attaching an updated version - just fixed a spelling error in the comment. Thanks, Greta gcc/ChangeLog PR target/56797 * config/arm/arm.c (load_multiple_sequence): Require SP as base register for loads if SP is in the register list. OK. R. -Original Message- From: Richard Earnshaw Sent: 19 April 2013 12:34 To: Greta Yorsh Cc: GCC Patches; raj.k...@gmail.com; Ramana Radhakrishnan Subject: Re: [PATCH, ARM] Fix PR56797 On 19/04/13 10:34, Greta Yorsh wrote: Fix PR56797 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56797 The problem is that peephole optimizer thinks it can generate an ldm, but the pattern for ldm no longer matches, because after r188738 it requires that if one of the destination registers is SP then the base register must be SP, and it's not SP in the test case. The test case fails on armv5t but doesn't fail on armv6t2 or armv7-a because peephole doesn't trigger there (because there is a different epilogue sequence). It looks like a latent problem for other architecture or CPUs. This patch adds this condition to the peephole optimizer. No regression on qemu for arm-none-eabi and fixes the test reported in the PR. I couldn't minimize the test sufficiently to include it in the testsuite. Ok for trunk? Thanks, Greta gcc/ 2013-04-18 Greta Yorsh greta.yo...@arm.com PR target/56797 * config/arm/arm.c (load_multiple_sequence): Require SP as base register for loads if SP is in the register list. OK. R. pr56797-ldm-peep-sp.patch.txt diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d00849c..60fef78 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -10347,6 +10347,13 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, || (i != nops - 1 unsorted_regs[i] == base_reg)) return 0; + /* Don't allow SP to be loaded unless it is also the base + register. It guarantees that SP is reset correctly when + an LDM instruction is interrupted. Otherwise, we might + end up with a corrupt stack. */ + if (unsorted_regs[i] == SP_REGNUM base_reg != SP_REGNUM) +return 0; + unsorted_offsets[i] = INTVAL (offset); if (i == 0 || unsorted_offsets[i] unsorted_offsets[order[0]]) order[0] = i;
PR57052, rs6000.md subregs
Practically all of the patterns in rs6000.md having a low-part subreg use zero as the byte number. That's correct when little-endian, but not when big-endian, so combine loses optimization opportunities. This patch duplicates the insns for big-endian. I know Segher has been working on removing these subreg patterns, but I think he'll need to stop combine creating low-part subregs for that to work. So until his patch materializes, is this OK for mainline? Bootstrapped and regression tested powerpc-linux. PR target/57052 * config/rs6000/rs6000.md (rotlsi3_internal7): Rename to rotlsi3_internal7le and condition on !BYTES_BIG_ENDIAN. (rotlsi3_internal8be): New BYTES_BIG_ENDIAN insn. Repeat for many other rotate/shift and mask patterns using subregs. Name lshiftrt insns. (ashrdisi3_noppc64): Rename to ashrdisi3_noppc64be and condition on WORDS_BIG_ENDIAN. Index: gcc/config/rs6000/rs6000.md === --- gcc/config/rs6000/rs6000.md (revision 198174) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -3805,20 +3801,33 @@ (const_int 0)))] ) -(define_insn *rotlsi3_internal7 +(define_insn *rotlsi3_internal7le [(set (match_operand:SI 0 gpc_reg_operand =r) (zero_extend:SI (subreg:QI (rotate:SI (match_operand:SI 1 gpc_reg_operand r) (match_operand:SI 2 reg_or_cint_operand ri)) 0)))] - + !BYTES_BIG_ENDIAN rlw%I2nm %0,%1,%h2,0xff [(set (attr cell_micro) (if_then_else (match_operand:SI 2 const_int_operand ) (const_string not) (const_string always)))]) -(define_insn *rotlsi3_internal8 +(define_insn *rotlsi3_internal7be + [(set (match_operand:SI 0 gpc_reg_operand =r) + (zero_extend:SI +(subreg:QI + (rotate:SI (match_operand:SI 1 gpc_reg_operand r) +(match_operand:SI 2 reg_or_cint_operand ri)) 3)))] + BYTES_BIG_ENDIAN + rlw%I2nm %0,%1,%h2,0xff + [(set (attr cell_micro) + (if_then_else (match_operand:SI 2 const_int_operand ) + (const_string not) + (const_string always)))]) + +(define_insn *rotlsi3_internal8le [(set (match_operand:CC 0 cc_reg_operand =x,x,?y,?y) (compare:CC (zero_extend:SI (subreg:QI @@ -3826,7 +3835,7 @@ (match_operand:SI 2 reg_or_cint_operand r,i,r,i)) 0)) (const_int 0))) (clobber (match_scratch:SI 3 =r,r,r,r))] - + !BYTES_BIG_ENDIAN @ rlwnm. %3,%1,%2,0xff rlwinm. %3,%1,%h2,0xff @@ -3835,6 +3844,23 @@ [(set_attr type var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare) (set_attr length 4,4,8,8)]) +(define_insn *rotlsi3_internal8be + [(set (match_operand:CC 0 cc_reg_operand =x,x,?y,?y) + (compare:CC (zero_extend:SI +(subreg:QI + (rotate:SI (match_operand:SI 1 gpc_reg_operand r,r,r,r) +(match_operand:SI 2 reg_or_cint_operand r,i,r,i)) 3)) + (const_int 0))) + (clobber (match_scratch:SI 3 =r,r,r,r))] + BYTES_BIG_ENDIAN + @ + rlwnm. %3,%1,%2,0xff + rlwinm. %3,%1,%h2,0xff + # + # + [(set_attr type var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare) + (set_attr length 4,4,8,8)]) + (define_split [(set (match_operand:CC 0 cc_reg_not_micro_cr0_operand ) (compare:CC (zero_extend:SI @@ -3843,7 +3869,7 @@ (match_operand:SI 2 reg_or_cint_operand )) 0)) (const_int 0))) (clobber (match_scratch:SI 3 ))] - reload_completed + !BYTES_BIG_ENDIAN reload_completed [(set (match_dup 3) (zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) @@ -3853,7 +3879,25 @@ (const_int 0)))] ) -(define_insn *rotlsi3_internal9 +(define_split + [(set (match_operand:CC 0 cc_reg_not_micro_cr0_operand ) + (compare:CC (zero_extend:SI +(subreg:QI + (rotate:SI (match_operand:SI 1 gpc_reg_operand ) +(match_operand:SI 2 reg_or_cint_operand )) 3)) + (const_int 0))) + (clobber (match_scratch:SI 3 ))] + BYTES_BIG_ENDIAN reload_completed + [(set (match_dup 3) + (zero_extend:SI (subreg:QI + (rotate:SI (match_dup 1) +(match_dup 2)) 3))) + (set (match_dup 0) + (compare:CC (match_dup 3) + (const_int 0)))] + ) + +(define_insn *rotlsi3_internal9le [(set (match_operand:CC 3 cc_reg_operand =x,x,?y,?y) (compare:CC (zero_extend:SI (subreg:QI @@ -3862,7 +3906,7 @@ (const_int 0))) (set (match_operand:SI 0 gpc_reg_operand =r,r,r,r) (zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))] - + !BYTES_BIG_ENDIAN @
PR 50686: dwarf2cfi fix for IRIX
Before the target was removed, IRIX defined DWARF_FRAME_RETURN_COLUMN to be reg 64, which is something other than INCOMING_RETURN_ADDR_RTX (reg 31). This means that when we see a save of reg 31, we discover that this is really a save of reg 64 and produce dwarf information accordingly. However, there is nothing that handles a restore of reg 31 specially, which means that reg 64 is unaffected by it. This is a problem when shrink-wrapping, if we end up merging two paths where one can arrive directly from the function start, and another where we've gone through a save/restore of reg 31. The paths will disagree on where reg 64 is saved. The following patch fixes it by handling restores of INCOMING_RETURN_ADDR_RTX specially if it is different from DWARF_FRAME_RETURN_COLUMN. Bootstrapped and tested on x86_64-linux, and also bootstrapped by Rainer Orth on IRIX with 4.7.3. Ok? Bernd * dwarf2cfi.c (dwarf2out_frame_debug_cfa_restore): When restoring INCOMING_RETURN_ADDR_RTX, and it is different from DWARF_FRAME_RETURN_COLUMN, mark the latter as saved in the former. Index: gcc/dwarf2cfi.c === --- gcc/dwarf2cfi.c (revision 189425) +++ gcc/dwarf2cfi.c (working copy) @@ -1240,6 +1240,10 @@ dwarf2out_frame_debug_cfa_restore (rtx r add_cfi_restore (regno); update_row_reg_save (cur_row, regno, NULL); + if (REG_P (INCOMING_RETURN_ADDR_RTX) + regno == dwf_regno (INCOMING_RETURN_ADDR_RTX) + regno != DWARF_FRAME_RETURN_COLUMN) +reg_save (DWARF_FRAME_RETURN_COLUMN, regno, 0); } /* A subroutine of dwarf2out_frame_debug, process a REG_CFA_WINDOW_SAVE.
Fix C++ testcases for size_t vs intptr_t
This fixes C++ testcases that used size_t when casting a pointer to integer so that they use intptr_t instead. There's also an testcase using pointer subtraction where ptrdiff_t is the correct choice, and a fix to the ptrmem.C testcase to use sizeof on a function pointer rather than a data pointer. A similar patch for C testcases was already applied last year. Bootstrapped and tested on x86_64-linux. Ok? Bernd gcc/testsuite/ * g++.old-deja/g++.mike/warn1.C (uintptr_t): Renamed from size_t. Use __UINTPTR_TYPE__. All uses changed. * g++.dg/other/offsetof3.C (uintptr_t): Likewise. * g++.dg/opt/switch3.C (uintptr_t): Likewise. * g++.dg/init/array11.C (uintptr_t): Likewise. * g++.dg/torture/pr41775.C (uintptr_t): Likewise. * g++.dg/tree-ssa/pr21082.C (b, c): Use __PTRDIFF_TYPE__. * g++.dg/debug/const2.C (b::d): Use __UINTPTR_TYPE__ instead of __SIZE_TYPE__. * g++.dg/parse/array-size2.C (foo): Likewise. * g++.dg/compat/struct-layout-1_x1.h (test##n): Likewise. * g++.dg/abi/offsetof.C (main): Likewise. * g++.dg/eh/alias1.C (g): Likewise. * g++.dg/init/static-init1.C (a): Likewise. * g++.dg/init/struct1.C (struct bug): Likewise. * g++.dg/init/struct2.C (saveOrLoad): Likewise. * g++.dg/init/struct3.C (foobar): Likewise. * g++.dg/torture/pr31579.C (a): Likewise. * g++.dg/torture/pr32563.C (i): Likewise. * g++.old-deja/g++.brendan/code-gen2.c (main): Use __UINTPTR_TYPE__ instead of __SIZE_TYPE__. * g++.old-deja/g++.other/temporary1.C: Likewise. * g++.old-deja/g++.pt/local1.C (setback): Likewise. * g++.old-deja/g++.pt/spec16.C (foo): Likewise. * g++.old-deja/g++.pt/local7.C (setback): Likewise. * g++.old-deja/g++.mike/net42.C (get_stat): Likewise. * g++.old-deja/g++.eh/ptr1.C (main): Likewise. * g++.old-deja/g++.brendan/crash64.C (uintptr_t): Define and use instead of size_t. * g++.old-deja/g++.abi/ptrmem.C (VPTE_SIZE): Define using a function pointer. diff --git a/gcc/testsuite/g++.dg/abi/offsetof.C b/gcc/testsuite/g++.dg/abi/offsetof.C index d6a53e6..68ba2d8 100644 --- a/gcc/testsuite/g++.dg/abi/offsetof.C +++ b/gcc/testsuite/g++.dg/abi/offsetof.C @@ -18,5 +18,5 @@ struct C: public B { }; int main () { - return ((__SIZE_TYPE__) ((C*)0)-i) != sizeof(void*); // { dg-warning offsetof|invalid } + return ((__UINTPTR_TYPE__) ((C*)0)-i) != sizeof(void*); // { dg-warning offsetof|invalid } } diff --git a/gcc/testsuite/g++.dg/compat/struct-layout-1_x1.h b/gcc/testsuite/g++.dg/compat/struct-layout-1_x1.h index e14433e..e304841 100644 --- a/gcc/testsuite/g++.dg/compat/struct-layout-1_x1.h +++ b/gcc/testsuite/g++.dg/compat/struct-layout-1_x1.h @@ -56,7 +56,7 @@ void test##n (void) \ info.als = __alignof__ (s##n);\ info.ala0 = __alignof__ (a##n[0]);\ info.ala3 = __alignof__ (a##n[3]);\ - if (((long) (__SIZE_TYPE__) a##n[3]) (info.als - 1)) \ + if (((long) (__UINTPTR_TYPE__) a##n[3]) (info.als - 1)) \ FAIL (n, 1); \ i = 0; j = 0; \ ops\ diff --git a/gcc/testsuite/g++.dg/debug/const2.C b/gcc/testsuite/g++.dg/debug/const2.C index 8e98f8b..3f075b8 100644 --- a/gcc/testsuite/g++.dg/debug/const2.C +++ b/gcc/testsuite/g++.dg/debug/const2.C @@ -12,4 +12,4 @@ struct b virtual bool IsEmpty() const=0; int e,c; }; -const int b::d = ((__SIZE_TYPE__)(((b*)1)-c) - 1); +const int b::d = ((__UINTPTR_TYPE__)(((b*)1)-c) - 1); diff --git a/gcc/testsuite/g++.dg/eh/alias1.C b/gcc/testsuite/g++.dg/eh/alias1.C index e6af383..fc1ed64 100644 --- a/gcc/testsuite/g++.dg/eh/alias1.C +++ b/gcc/testsuite/g++.dg/eh/alias1.C @@ -16,7 +16,7 @@ void g (int i) { if (!i_glob) -exit ((__SIZE_TYPE__) i); +exit ((__INTPTR_TYPE__) i); } static void diff --git a/gcc/testsuite/g++.dg/init/array11.C b/gcc/testsuite/g++.dg/init/array11.C index e52effe..594678a 100644 --- a/gcc/testsuite/g++.dg/init/array11.C +++ b/gcc/testsuite/g++.dg/init/array11.C @@ -9,11 +9,11 @@ int x; -typedef __SIZE_TYPE__ size_t; +typedef __UINTPTR_TYPE__ uintptr_t; struct gdt { -size_t a,b,c,d,e,f; +uintptr_t a,b,c,d,e,f; }; void f() { @@ -21,7 +21,7 @@ struct gdt gdt_table[2]= { { 0, - ( (((size_t)(x))(24))(-1(8)) ), + ( (((uintptr_t)(x))(24))(-1(8)) ), }, }; } diff --git a/gcc/testsuite/g++.dg/init/static-init1.C b/gcc/testsuite/g++.dg/init/static-init1.C index 298d171..6852dad 100644 --- a/gcc/testsuite/g++.dg/init/static-init1.C +++ b/gcc/testsuite/g++.dg/init/static-init1.C @@ -2,4 +2,4 @@ // Make sure we don't think we can initialize a at compile time. char c; -short a[] = { (short)((__PTRDIFF_TYPE__)c + (__PTRDIFF_TYPE__)c) }; +short a[] = { (short)((__UINTPTR_TYPE__)c + (__UINTPTR_TYPE__)c) }; diff --git a/gcc/testsuite/g++.dg/init/struct1.C b/gcc/testsuite/g++.dg/init/struct1.C index e23faef..834dca1 100644 --- a/gcc/testsuite/g++.dg/init/struct1.C +++ b/gcc/testsuite/g++.dg/init/struct1.C @@ -1,6
Fix an invalid C++ testcase
The pr8781.C testcase appears to have undefined behaviour as far as I can tell. A function called noop is called with argument pred; this creates a noop_t object which stores a reference to that argument, and then the function returns the object. At that point I think the reference becomes stale. This seems to be fixable by changing the pred argument to be a reference itself. Normally, inlining fixes up the situation, but this showed up as a problem on a port where inlining was more difficult than usual. Bootstrapped and tested on x86_64-linux. Ok? Bernd * g++.dg/tree-ssa/pr8781.C (noop): Make argument a reference. diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C index cc518a0..a6050c4 100644 --- a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C +++ b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C @@ -13,7 +13,7 @@ public: }; templatetypename predicate -inline noop_tpredicate noop(const predicate pred) { +inline noop_tpredicate noop(const predicate pred) { return noop_tpredicate(pred); }
powerpc64le-linux support
This is a first pass at getting powerpc64 little-endian support into shape. There are no doubt more bugs lurking, but this lets me build a little-endian C compiler and libgcc to start the little-endian bootstrap process. Bootstrapped and regression tested powerpc64-linux to ensure this doesn't break big-endian. OK for mainline? libgcc/ * config.host: Match little-endian powerpc-linux. gcc/ * config.gcc: Support little-endian powerpc-linux targets. * config/rs6000/linux.h (LINK_OS_LINUX_EMUL): Define. (LINK_OS_LINUX_SPEC): Define. * config/rs6000/linuxspe.h (TARGET_DEFAULT): Preserve MASK_LITTLE_ENDIAN. * config/rs6000/default64.h (TARGET_DEFAULT): Likewise. * config/rs6000/linuxaltivec.h (TARGET_DEFAULT): Likewise. * config/rs6000/linux64.h (OPTION_LITTLE_ENDIAN): Don't zero. (LINK_OS_LINUX_EMUL32, LINK_OS_LINUX_EMUL64): Define. (LINK_OS_LINUX_SPEC32, LINK_OS_LINUX_SPEC64): Use above. * config/rs6000/rs6000.c (output_toc): Don't use .tc for TARGET_ELF. Correct fp word order for little-endian. Don't shift toc entries smaller than a word for little-endian. * config/rs6000/rs6000.md (bswaphi2, bswapsi2 split): Comment. (bswapdi2 splits): Correct low-part subreg for little-endian. Remove wrong BYTES_BIG_ENDIAN tests, and rename vars to remove low/high where such is correct only for be. * config/rs6000/sysv4.h (SUBTARGET_OVERRIDE_OPTIONS): Allow little-endian for -mcall-aixdesc. Index: libgcc/config.host === --- libgcc/config.host (revision 198174) +++ libgcc/config.host (working copy) @@ -882,7 +882,7 @@ tmake_file=${tmake_file} rs6000/t-ppccomm rs6000/t-savresfgpr rs6000/t-crtstuff t-crtstuff-pic t-fdpbit extra_parts=$extra_parts crtbeginS.o crtendS.o crtbeginT.o ecrti.o ecrtn.o ncrti.o ncrtn.o ;; -powerpc-*-linux* | powerpc64-*-linux*) +powerpc*-*-linux*) tmake_file=${tmake_file} rs6000/t-ppccomm rs6000/t-savresfgpr rs6000/t-crtstuff rs6000/t-linux t-softfp-sfdf t-softfp-excl t-dfprules rs6000/t-ppc64-fp t-softfp t-slibgcc-libgcc extra_parts=$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o md_unwind_header=rs6000/linux-unwind.h Index: gcc/config.gcc === --- gcc/config.gcc (revision 198174) +++ gcc/config.gcc (working copy) @@ -2086,20 +2086,24 @@ extra_options=${extra_options} rs6000/sysv4.opt tmake_file=rs6000/t-fprules rs6000/t-rtems t-rtems rs6000/t-ppccomm ;; -powerpc-*-linux* | powerpc64-*-linux*) +powerpc*-*-linux*) tm_file=${tm_file} dbxelf.h elfos.h freebsd-spec.h rs6000/sysv4.h extra_options=${extra_options} rs6000/sysv4.opt tmake_file=rs6000/t-fprules rs6000/t-ppcos ${tmake_file} rs6000/t-ppccomm + case ${target} in + powerpc*le-*-*) + tm_file=${tm_file} rs6000/sysv4le.h ;; + esac maybe_biarch=yes case ${target} in - powerpc64-*-linux*spe* | powerpc64-*-linux*paired*) + powerpc64*-*-linux*spe* | powerpc64*-*-linux*paired*) echo *** Configuration ${target} not supported 12 exit 1 ;; - powerpc-*-linux*spe* | powerpc-*-linux*paired*) + powerpc*-*-linux*spe* | powerpc*-*-linux*paired*) maybe_biarch= ;; - powerpc64-*-linux*) + powerpc64*-*-linux*) test x$with_cpu != x || cpu_is_64bit=yes maybe_biarch=always ;; Index: gcc/config/rs6000/linux.h === --- gcc/config/rs6000/linux.h (revision 198174) +++ gcc/config/rs6000/linux.h (working copy) @@ -79,6 +79,17 @@ #undef LINK_OS_DEFAULT_SPEC #define LINK_OS_DEFAULT_SPEC %(link_os_linux) +#if (TARGET_DEFAULT MASK_LITTLE_ENDIAN) +#define LINK_OS_LINUX_EMUL %{!mbig: %{!mbig-endian: -m elf32lppclinux}}%{mbig|mbig-endian: -m elf32ppclinux} +#else +#define LINK_OS_LINUX_EMUL %{!mlittle: %{!mlittle-endian: -m elf32ppclinux}}%{mlittle|mlittle-endian: -m elf32lppclinux} +#endif + +#undef LINK_OS_LINUX_SPEC +#define LINK_OS_LINUX_SPEC LINK_OS_LINUX_EMUL %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker GNU_USER_DYNAMIC_LINKER }} + #define LINK_GCC_C_SEQUENCE_SPEC \ %{static:--start-group} %G %L %{static:--end-group}%{!static:%G} Index: gcc/config/rs6000/linuxspe.h === --- gcc/config/rs6000/linuxspe.h(revision 198174) +++ gcc/config/rs6000/linuxspe.h(working copy) @@ -20,8 +20,13 @@ http://www.gnu.org/licenses/. */ /* Override rs6000.h and sysv4.h definition. */ +#if (TARGET_DEFAULT MASK_LITTLE_ENDIAN) #undef TARGET_DEFAULT +#define TARGET_DEFAULT
RFA: Fix declaration of default value of TARGET_NARROW_VOLATILE_BITFIELD
Hi Guys, The TARGET_NARROW_VOLATILE_BITFIELD target macro defaults to false: DEFHOOK (narrow_volatile_bitfield, , bool, (void), hook_bool_void_false) but the documentation currently states that it defaults to !TARGET_STRICT_ALIGN. The patch below corrects this, and also fixes a small typo in the description of TARGET_CANONICALIZE_COMPARISON. Ok to apply ? Cheers Nick gcc/ChangeLog 2013-04-24 Nick Clifton ni...@redhat.com * doc/tm.texi.in (TARGET_NARROW_VOLATILE_BITFIELD): Change the default to false. (TARGET_CANONICALIZE_COMPARISON); Correct typo. * doc/tm.texi: Regenerate. Index: gcc/doc/tm.texi.in === --- gcc/doc/tm.texi.in (revision 198216) +++ gcc/doc/tm.texi.in (working copy) @@ -1236,7 +1236,7 @@ should use the narrowest mode possible. It should return @code{false} if these accesses should use the bitfield container type. -The default is @code{!TARGET_STRICT_ALIGN}. +The default is @code{false}. @end deftypefn @hook TARGET_MEMBER_TYPE_FORCES_BLK @@ -5947,7 +5947,7 @@ valid but will see if the resulting insn matches a pattern in the @file{md} file. -You need not to implement this hook if it would never change the +You need not implement this hook if it would never change the comparison code or operands. @end deftypefn
vtables patch 1/3: allow empty array initializations
This is a patch series that changes the way vtables are constructed in the C++ frontend (the first two are small preliminary patches, the meat will be in 3/3). The problem I was trying to solve was that on the port I was working on, function pointers are smaller than data pointers, and size_t is smaller than a data pointer too. All three kinds of types are used in vtables. The C++ frontend assumes all three sizes are identical and constructs references to them by just multiplying an index with a (constant) size. (There is a target macro to add extra padding to the data members, but it's unsuitable for the situation I described, and in any case wastes too much space to be useful.) This patch series changes that so that along with a vtable and its initializers, we build up a list of structure fields and create a vtable type from these. We can then access the various elements by using COMPONENT_REFs without having to know exactly what the offsets are. Incidentally, libsupc++ already uses a struct vtable_prefix to describe the layout. I think this is a cleanup, but since the port won't be contributed I could understand if the C++ maintainers don't want to risk this patch set. If it doesn't go in, maybe it will be useful as a reference for others when porting to a similar target. There is one assumption here that needs to be pointed out, and one little piece of ugliness. The assumption is that on current targets, size_type_node is always the same size as const_ptr_type_node (i.e. sizeof (void *) == sizeof (size_t)), and also that they have the same alignment. This is required because the data fields of the vtable are now created using a size_type_node. If someone knows of a target where this isn't true, it would be helpful to know. Given the size_t/uintptr_t testsuite patch I just submitted I'm thinking they don't exist, but I'm kind of wondering about m32c, so Cc'ing DJ. The small ugliness is that we must allow empty arrays in the middle of structures since we must be able to take the address of such an array field to get an object's vtable pointer. GCC seems to have no problems with this concept in general, but since 4.5 (which I was working on) one little problem has crept in: we crash in varasm.c when finding such an array while initializing the vtable. This is addressed by the following small preliminary patch which essentially just restores the previous code. Bootstrapped and tested on x86_64-linux, ok? Bernd commit 618d06f7d414842a934fb360fa98972478e13483 Author: Bernd Schmidt ber...@codesourcery.com Date: Tue Apr 23 15:19:07 2013 +0200 Allow empty arrays to be initialized This undoes an earlier change in output_constructor_regular_field so that we no longer crash when a zero-size array is initialized. This is in preparation for changes to the way C++ vtables are laid out. * varasm.c (output_constructor_regular_field): Don't crash for arrays with empty DECL_SIZE_UNIT. diff --git a/gcc/varasm.c b/gcc/varasm.c index 2532d80..830fdd0 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -4833,7 +4833,7 @@ output_constructor_regular_field (oc_local_state *local) better be last. */ gcc_assert (!fieldsize || !DECL_CHAIN (local-field)); } - else + else if (DECL_SIZE_UNIT (local-field)) fieldsize = tree_low_cst (DECL_SIZE_UNIT (local-field), 1); } else
rs6000_emit_set_long_const tidy
This just removes some unnecessary tests. It's easy to see that if the sign bit isn't set then xor 0x8000 followed by subtract 0x8000 leaves the value unchanged. Bootstrapped etc. powerpc-linux. OK for mainline? * config/rs6000/rs6000.c (rs6000_emit_set_long_const): Tidy. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 198174) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7104,21 +7104,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_IN if ((ud4 == 0x ud3 == 0x ud2 == 0x (ud1 0x8000)) || (ud4 == 0 ud3 == 0 ud2 == 0 ! (ud1 0x8000))) - { - if (ud1 0x8000) - emit_move_insn (dest, GEN_INT (((ud1 ^ 0x8000) - 0x8000))); - else - emit_move_insn (dest, GEN_INT (ud1)); - } + emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); else if ((ud4 == 0x ud3 == 0x (ud2 0x8000)) || (ud4 == 0 ud3 == 0 ! (ud2 0x8000))) { - if (ud2 0x8000) - emit_move_insn (dest, GEN_INT (((ud2 16) ^ 0x8000) - - 0x8000)); - else - emit_move_insn (dest, GEN_INT (ud2 16)); + emit_move_insn (dest, GEN_INT (((ud2 16) ^ 0x8000) +- 0x8000)); if (ud1 != 0) emit_move_insn (copy_rtx (dest), gen_rtx_IOR (DImode, copy_rtx (dest), @@ -7141,12 +7133,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_IN else if ((ud4 == 0x (ud3 0x8000)) || (ud4 == 0 ! (ud3 0x8000))) { - if (ud3 0x8000) - emit_move_insn (dest, GEN_INT (((ud3 16) ^ 0x8000) - - 0x8000)); - else - emit_move_insn (dest, GEN_INT (ud3 16)); - + emit_move_insn (dest, GEN_INT (((ud3 16) ^ 0x8000) +- 0x8000)); if (ud2 != 0) emit_move_insn (copy_rtx (dest), gen_rtx_IOR (DImode, copy_rtx (dest), @@ -7161,12 +7149,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_IN } else { - if (ud4 0x8000) - emit_move_insn (dest, GEN_INT (((ud4 16) ^ 0x8000) - - 0x8000)); - else - emit_move_insn (dest, GEN_INT (ud4 16)); - + emit_move_insn (dest, GEN_INT (((ud4 16) ^ 0x8000) +- 0x8000)); if (ud3 != 0) emit_move_insn (copy_rtx (dest), gen_rtx_IOR (DImode, copy_rtx (dest), @@ -7181,7 +7165,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_IN GEN_INT (ud2 16))); if (ud1 != 0) emit_move_insn (copy_rtx (dest), - gen_rtx_IOR (DImode, copy_rtx (dest), GEN_INT (ud1))); + gen_rtx_IOR (DImode, copy_rtx (dest), +GEN_INT (ud1))); } } return dest; -- Alan Modra Australia Development Lab, IBM
vtables patch 2/3: fold vtable refs that use COMPONENT_REFs
This is another preliminary piece. It teaches some folding code that vtable references could contain COMPONENT_REFs and how to deal with that. Bootstrapped and tested on x86_64-linux, both with and without the final vtables patch. Ok? Bernd commit d1b84c0bbdb1816b7173dca29486bc88124e4641 Author: Bernd Schmidt ber...@codesourcery.com Date: Tue Apr 23 15:17:34 2013 +0200 Allow virtual table refs to be COMPONENT_REFs This is a preparation patch for changing the C++ vtable layout. It makes gimple-fold aware of how to fold a vtable reference that is a COMPONENT_REF. * gimple-fold.c (gimple_get_virt_method_for_binfo): Allow COMPONENT_REFs. diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index b9211a9..e2364bf 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -3118,16 +3118,25 @@ gimple_get_virt_method_for_binfo (HOST_WIDE_INT token, tree known_binfo) if (TREE_CODE (v) != ADDR_EXPR) return NULL_TREE; v = TREE_OPERAND (v, 0); + tree v_outer = v; + if (TREE_CODE (v_outer) == COMPONENT_REF) +{ + tree field = TREE_OPERAND (v_outer, 1); + offset += tree_low_cst (DECL_FIELD_OFFSET (field), 1) * BITS_PER_UNIT; + offset += tree_low_cst (DECL_FIELD_BIT_OFFSET (field), 1); + v_outer = TREE_OPERAND (v_outer, 0); +} - if (TREE_CODE (v) != VAR_DECL - || !DECL_VIRTUAL_P (v) - || !DECL_INITIAL (v) - || DECL_INITIAL (v) == error_mark_node) + if (TREE_CODE (v_outer) != VAR_DECL + || !DECL_VIRTUAL_P (v_outer) + || !DECL_INITIAL (v_outer) + || DECL_INITIAL (v_outer) == error_mark_node) return NULL_TREE; gcc_checking_assert (TREE_CODE (TREE_TYPE (v)) == ARRAY_TYPE); size = tree_low_cst (TYPE_SIZE (TREE_TYPE (TREE_TYPE (v))), 1); offset += token * size; - fn = fold_ctor_reference (TREE_TYPE (TREE_TYPE (v)), DECL_INITIAL (v), + fn = fold_ctor_reference (TREE_TYPE (TREE_TYPE (v)), + DECL_INITIAL (v_outer), offset, size, vtable); if (!fn || integer_zerop (fn)) return NULL_TREE;
driver-rs6000.c comment pasto
Committed as obvious. * config/rs6000/driver-rs6000.c (elf_dcachebsize): Fix comment pasto. Index: gcc/config/rs6000/driver-rs6000.c === --- gcc/config/rs6000/driver-rs6000.c (revision 198174) +++ gcc/config/rs6000/driver-rs6000.c (working copy) @@ -190,7 +190,7 @@ return NULL; } -/* Returns AT_PLATFORM if present, otherwise generic 32. */ +/* Returns AT_DCACHEBSIZE if present, otherwise generic 32. */ static int elf_dcachebsize (void) -- Alan Modra Australia Development Lab, IBM
vtables patch 3/3
This is the final piece which contains the modifications to the C++ frontend. Bootstrapped and tested on x86_64-linux, including compat tests against the installed compiler. I've also built an ia64-hpux (the only target defining TARGET_VTABLE_DATA_ENTRY_DISTANCE) cc1plus and verified that at least it doesn't crash on some vtable testcases, but realistically I have no way of properly testing that target. Ok? Bernd commit d6957d8e1ad8179c01d373f741099b021a05730f Author: Bernd Schmidt ber...@codesourcery.com Date: Mon Apr 15 11:24:04 2013 +0200 Change handling of vtables to use fields and COMPONENT_REFs The C++ frontend accesses the various parts of the vtable using positive and negative indices from the vtable pointer. This assumes that all fields in the vtable have the same size. Change this to build structure decls instead and access data with COMPONENT_REFs of the fields. gcc/cp/ * class.c (struct vtbl_init_data_s): Add new member FIELD. Change INDEX to an int. (build_vtbl_initializer, layout_vtable_decl, accumulate_vtbl_inits, dfs_accumulate_vtbl_inits): Adjust declarations. (dump_array, initialize_vtable): Remove unnecessary declarations. (vfunc_array_vtbl_offset): New static function. (build_vtbl_ref_1): Remove idx argument, add new argument outer. Return only a pointer to the vtbl or a pointer to its array member, depending on outer. All callers changed. (build_vtbl_rtti_offset_ref, build_vtbl_rtti_object_ref): New functions. (build_vtbl_ref): Build the array reference here. (build_vfn_ref): Use build_vtbl_ref, not build_vtbl_ref_1. (layout_vtable_decl): Remove argument n, add new argument fields. All callers changed. Make a record type instead of an array. (vbase_offset_from_index): New function. (get_vtbl_decl_for_binfo): Adjust to expect an ADDR_EXPR of a COMPONENT_REF. (dump_array): New arguments inits and off. All callers changed. Use the inits instead of DECL_INITIAL. Print offsets starting from off. Print different header depending on the type of the decl. (dump_record): New static function. (dump_vtable): Use it instead of dump_array. (initialize_vtable): Move before users. Add new arg fields. All callers changed. (finish_vtbls): Create an empty fields vector and pass it to accumulate_vtbl_inits. (build_ctor_vtbl_group): Likewise. Use layout_vtable_decl. (accumulate_vtbl_inits): Add new arg fields. All callers changed. (dfs_accumulate_vtbl_inits): Likewise. Build an ADDR_EXPR of a COMPONENT_REF to get the right vtbl pointer. Truncate fields along with inits if necessary. (build_vtbl_initializer): Add new arg fields. All callers changed. Use integer arithmetic for the vid.index field. Add extra fields for alignment padding if necessary. Create and append a FIELD_DECL for the array. (build_vbase_offset_vtbl_entries): Use integer arithmetic for vid-index. Use vbase_offset_from_index. Create a FIELD_DECL corresponding to the initializer. Remove cast to vtable_entry_type. (add_vcall_offset): Use integer arithmetic for vid-index. Remove cast to vtable_entry_type. Create a FIELD_DECL corresponding to the initializer. (build_rtti_vtbl_entries): Use null_pointer_node instead of integer_zero_node. Don't cast to vfunc_ptr_type_node. Create a FIELD_DECL corresponding to the initializer. * decl.c (initialize_predefined_identifiers): Add voffset, vti and array identifiers. (create_vtable_type): New static function. (cxx_init_decl_processing): Use it to create vtbl_type_node. Set vtbl_array_type_node to the array type. * decl2.c (mark_vtable_elt): New static function. (mark_vtable_entries): Use it. Look inside CONSTRUCTORs for elements that need marking. * rtti.c (build_headof): Use build_vtbl_rtti_offset_ref and build_vtbl_rtti_object_ref. (tinfo_base_init): Use the address of a COMPONENT_REF to find the right offset. * method.c (make_thunk): Use vbase_offset_from_index. * cp-tree.h (enum cp_tree_index): Add CPTI_VTBL_ARRAY_TYPE, CPTI_VOFFSET_IDENTIFIER, CPTI_VTI_IDENTIFIER, CPTI_ARRAY_IDENTIFIER. (vtbl_array_type_node, voffset_identifier, vti_identifier, array_identifier): Define. (vbase_offset_from_index, build_vtbl_rtti_offset_ref, build_vtbl_rti_object_ref): Declare. diff --git a/gcc/cp/class.c b/gcc/cp/class.c index 58248bf..fbc712b 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -77,6 +77,8 @@ typedef struct vtbl_init_data_s /* The negative-index vtable initializers built up so far. These are in order from least negative index to most negative index. */ vecconstructor_elt, va_gc *inits; + /* FIELD_DECLs corresponding to the previous set of initializers. */ + vectree, va_gc *fields; /* The binfo
[C++ Patch] PR 56970
Hi, one of those cases where we error out in a SFINAE context because the tsubst_flags_t isn't propagated far enough. The straightforward patch works fine mainline and 4_8-branch. Tested x86_64-linux. Thanks, Paolo. /cp 2013-04-24 Paolo Carlini paolo.carl...@oracle.com PR c++/56970 * init.c (build_offset_ref): Add tsubst_flags_t parameter. * semantics.c (finish_qualified_id_expr): Likewise. (finish_id_expression): Update. * typeck.c (cp_build_addr_expr_1): Likewise. * pt.c (tsubst_qualified_id, resolve_nondeduced_context): Likewise. * cp-tree.h: Update declarations. /testsuite 2013-04-24 Paolo Carlini paolo.carl...@oracle.com PR c++/56970 * g++.dg/cpp0x/sfinae45.C: New. Index: cp/cp-tree.h === --- cp/cp-tree.h(revision 198220) +++ cp/cp-tree.h(working copy) @@ -5351,7 +5351,8 @@ extern tree get_type_value(tree); extern tree build_zero_init(tree, tree, bool); extern tree build_value_init (tree, tsubst_flags_t); extern tree build_value_init_noctor(tree, tsubst_flags_t); -extern tree build_offset_ref (tree, tree, bool); +extern tree build_offset_ref (tree, tree, bool, +tsubst_flags_t); extern tree build_new (vectree, va_gc **, tree, tree, vectree, va_gc **, int, tsubst_flags_t); @@ -5748,7 +5749,7 @@ extern void add_typedef_to_current_template_for_ac location_t); extern void check_accessibility_of_qualified_id (tree, tree, tree); extern tree finish_qualified_id_expr (tree, tree, bool, bool, -bool, bool); +bool, bool, tsubst_flags_t); extern void simplify_aggr_init_expr(tree *); extern void finalize_nrv (tree *, tree, tree); extern void note_decl_for_pch (tree); Index: cp/init.c === --- cp/init.c (revision 198220) +++ cp/init.c (working copy) @@ -1817,7 +1817,8 @@ get_type_value (tree name) @@ This function should be rewritten and placed in search.c. */ tree -build_offset_ref (tree type, tree member, bool address_p) +build_offset_ref (tree type, tree member, bool address_p, + tsubst_flags_t complain) { tree decl; tree basebinfo = NULL_TREE; @@ -1841,7 +1842,8 @@ tree type = TYPE_MAIN_VARIANT (type); if (!COMPLETE_OR_OPEN_TYPE_P (complete_type (type))) { - error (incomplete type %qT does not have member %qD, type, member); + if (complain tf_error) + error (incomplete type %qT does not have member %qD, type, member); return error_mark_node; } @@ -1854,7 +1856,8 @@ tree if (TREE_CODE (member) == FIELD_DECL DECL_C_BIT_FIELD (member)) { - error (invalid pointer to bit-field %qD, member); + if (complain tf_error) + error (invalid pointer to bit-field %qD, member); return error_mark_node; } @@ -1883,10 +1886,10 @@ tree if (address_p DECL_P (t) DECL_NONSTATIC_MEMBER_P (t)) perform_or_defer_access_check (TYPE_BINFO (type), t, t, - tf_warning_or_error); + complain); else perform_or_defer_access_check (basebinfo, t, t, - tf_warning_or_error); + complain); if (DECL_STATIC_FUNCTION_P (t)) return t; @@ -1900,7 +1903,7 @@ tree check_accessibility_of_qualified_id in case it is a pointer to non-static member. */ perform_or_defer_access_check (TYPE_BINFO (type), member, member, - tf_warning_or_error); + complain); if (!address_p) { @@ -1932,15 +1935,17 @@ tree if (flag_ms_extensions) { PTRMEM_OK_P (member) = 1; - return cp_build_addr_expr (member, tf_warning_or_error); + return cp_build_addr_expr (member, complain); } - error (invalid use of non-static member function %qD, -TREE_OPERAND (member, 1)); + if (complain tf_error) + error (invalid use of non-static member function %qD, + TREE_OPERAND (member, 1)); return error_mark_node; } else if (TREE_CODE (member) == FIELD_DECL) { - error (invalid use of non-static data member %qD, member); +
Re: patch to fix constant math -5th patch, rtl
On Tue, Apr 16, 2013 at 10:17 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: Here is a refreshed version of the rtl changes for wide-int. the only change from the previous versions is that the wide-int binary operations have been simplified to use the new wide-int binary templates. Looking for from_rtx calls (to see where we get the mode/precision from) I see for example - o = rtx_to_double_int (outer); - i = rtx_to_double_int (inner); - - m = double_int::mask (width); - i = m; - m = m.llshift (offset, HOST_BITS_PER_DOUBLE_INT); - i = i.llshift (offset, HOST_BITS_PER_DOUBLE_INT); - o = o.and_not (m) | i; - + + o = (wide_int::from_rtx (outer, GET_MODE (SET_DEST (temp))) + .insert (wide_int::from_rtx (inner, GET_MODE (dest)), + offset, width)); where I'd rather have the original code preserved as much as possible and not introduce a new primitive wide_int::insert for this. The conversion and review process will be much more error-prone if we do multiple things at once (and it might keep the wide_int initial interface leaner). Btw, the wide_int::insert implementation doesn't assert anything about the inputs precision. Instead it reads + if (start + width = precision) +width = precision - start; + + mask = shifted_mask (start, width, false, precision); + tmp = op0.lshift (start, 0, precision, NONE); + result = tmp mask; + + tmp = and_not (mask); + result = result | tmp; which eventually ends up performing everything in target precision. So we don't really care about the mode or precision of inner. Then I see diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h index ad03a34..531a7c1 100644 @@ -180,6 +182,7 @@ typedef struct GTY(()) dw_val_struct { HOST_WIDE_INT GTY ((default)) val_int; unsigned HOST_WIDE_INT GTY ((tag (dw_val_class_unsigned_const))) val_unsigned; double_int GTY ((tag (dw_val_class_const_double))) val_double; + wide_int GTY ((tag (dw_val_class_wide_int))) val_wide; dw_vec_const GTY ((tag (dw_val_class_vec))) val_vec; struct dw_val_die_union { ick. That makes dw_val_struct really large ... (and thus dw_attr_struct). You need to make this a pointer to a wide_int at least. -/* Return a CONST_INT or CONST_DOUBLE corresponding to target reading +/* Return a constant integer corresponding to target reading GET_MODE_BITSIZE (MODE) bits from string constant STR. */ static rtx c_readstr (const char *str, enum machine_mode mode) { - HOST_WIDE_INT c[2]; + wide_int c; ... - return immed_double_const (c[0], c[1], mode); + + c = wide_int::from_array (tmp, len, mode); + return immed_wide_int_const (c, mode); } err - what's this good for? It doesn't look necessary as part of the initial wide-int conversion at least. (please audit your patches for such cases) @@ -4994,12 +4999,12 @@ expand_builtin_signbit (tree exp, rtx target) if (bitpos GET_MODE_BITSIZE (rmode)) { - double_int mask = double_int_zero.set_bit (bitpos); + wide_int mask = wide_int::set_bit_in_zero (bitpos, rmode); if (GET_MODE_SIZE (imode) GET_MODE_SIZE (rmode)) temp = gen_lowpart (rmode, temp); temp = expand_binop (rmode, and_optab, temp, - immed_double_int_const (mask, rmode), + immed_wide_int_const (mask, rmode), NULL_RTX, 1, OPTAB_LIB_WIDEN); } else Likewise. I suppose you remove immed_double_int_const but I see no reason to do that. It just makes your patch larger than necessary. [what was the reason again to have TARGET_SUPPORTS_WIDE_INT at all? It's supposed to be a no-op conversion, right?] @@ -95,38 +95,9 @@ plus_constant (enum machine_mode mode, rtx x, HOST_WIDE_INT c) switch (code) { -case CONST_INT: - if (GET_MODE_BITSIZE (mode) HOST_BITS_PER_WIDE_INT) - { - double_int di_x = double_int::from_shwi (INTVAL (x)); - double_int di_c = double_int::from_shwi (c); - - bool overflow; - double_int v = di_x.add_with_sign (di_c, false, overflow); - if (overflow) - gcc_unreachable (); - - return immed_double_int_const (v, VOIDmode); - } - - return GEN_INT (INTVAL (x) + c); - -case CONST_DOUBLE: - { - double_int di_x = double_int::from_pair (CONST_DOUBLE_HIGH (x), -CONST_DOUBLE_LOW (x)); - double_int di_c = double_int::from_shwi (c); - - bool overflow; - double_int v = di_x.add_with_sign (di_c, false, overflow); - if (overflow) - /* Sorry, we have no way to represent overflows this wide. -To fix, add constant support wider than CONST_DOUBLE. */ - gcc_assert (GET_MODE_BITSIZE (mode) = HOST_BITS_PER_DOUBLE_INT); - - return immed_double_int_const (v, VOIDmode); - } - +CASE_CONST_SCALAR_INT: + return
[PATCH] Prune SCEV
I noticed that some functions in SCEV aren't used at all. They were added in 4.0, but never used since. Regtested/bootstrapped on x86_64-linux, ok for trunk? 2013-04-24 Marek Polacek pola...@redhat.com * tree-scalar-evolution.h (analyze_scalar_evolution): Remove. * tree-scalar-evolution.c (get_exit_conditions_rec): Likewise. (select_loops_exit_conditions): Likewise. (number_of_iterations_for_all_loops): Likewise. (analyze_scalar_evolution_for_all_loop_phi_nodes): Likewise. (scev_analysis): Likewise. --- gcc/tree-scalar-evolution.h.mp 2013-04-24 12:22:18.591373400 +0200 +++ gcc/tree-scalar-evolution.h 2013-04-24 12:23:46.426701201 +0200 @@ -34,7 +34,6 @@ extern tree analyze_scalar_evolution (st extern tree instantiate_scev (basic_block, struct loop *, tree); extern tree resolve_mixers (struct loop *, tree); extern void gather_stats_on_scev_database (void); -extern void scev_analysis (void); extern unsigned int scev_const_prop (void); extern bool expression_expensive_p (tree); extern bool simple_iv (struct loop *, struct loop *, tree, affine_iv *, bool); --- gcc/tree-scalar-evolution.c.mp 2013-04-24 12:22:18.587373384 +0200 +++ gcc/tree-scalar-evolution.c 2013-04-24 12:32:54.044328690 +0200 @@ -868,39 +868,6 @@ get_loop_exit_condition (const struct lo return res; } -/* Recursively determine and enqueue the exit conditions for a loop. */ - -static void -get_exit_conditions_rec (struct loop *loop, -vecgimple *exit_conditions) -{ - if (!loop) -return; - - /* Recurse on the inner loops, then on the next (sibling) loops. */ - get_exit_conditions_rec (loop-inner, exit_conditions); - get_exit_conditions_rec (loop-next, exit_conditions); - - if (single_exit (loop)) -{ - gimple loop_condition = get_loop_exit_condition (loop); - - if (loop_condition) - exit_conditions-safe_push (loop_condition); -} -} - -/* Select the candidate loop nests for the analysis. This function - initializes the EXIT_CONDITIONS array. */ - -static void -select_loops_exit_conditions (vecgimple *exit_conditions) -{ - struct loop *function_body = current_loops-tree_root; - - get_exit_conditions_rec (function_body-inner, exit_conditions); -} - /* Depth first search algorithm. */ @@ -2882,41 +2849,6 @@ number_of_exit_cond_executions (struct l return ret; } -/* One of the drivers for testing the scalar evolutions analysis. - This function computes the number of iterations for all the loops - from the EXIT_CONDITIONS array. */ - -static void -number_of_iterations_for_all_loops (vecgimple *exit_conditions) -{ - unsigned int i; - unsigned nb_chrec_dont_know_loops = 0; - unsigned nb_static_loops = 0; - gimple cond; - - FOR_EACH_VEC_ELT (*exit_conditions, i, cond) -{ - tree res = number_of_latch_executions (loop_containing_stmt (cond)); - if (chrec_contains_undetermined (res)) - nb_chrec_dont_know_loops++; - else - nb_static_loops++; -} - - if (dump_file) -{ - fprintf (dump_file, \n(\n); - fprintf (dump_file, -\n); - fprintf (dump_file, %d\tnb_chrec_dont_know_loops\n, nb_chrec_dont_know_loops); - fprintf (dump_file, %d\tnb_static_loops\n, nb_static_loops); - fprintf (dump_file, %d\tnb_total_loops\n, number_of_loops ()); - fprintf (dump_file, -\n); - fprintf (dump_file, )\n\n); - - print_loops (dump_file, 3); -} -} - /* Counters for the stats. */ @@ -3028,54 +2960,6 @@ gather_chrec_stats (tree chrec, struct c fprintf (dump_file, )\n); } -/* One of the drivers for testing the scalar evolutions analysis. - This function analyzes the scalar evolution of all the scalars - defined as loop phi nodes in one of the loops from the - EXIT_CONDITIONS array. - - TODO Optimization: A loop is in canonical form if it contains only - a single scalar loop phi node. All the other scalars that have an - evolution in the loop are rewritten in function of this single - index. This allows the parallelization of the loop. */ - -static void -analyze_scalar_evolution_for_all_loop_phi_nodes (vecgimple *exit_conditions) -{ - unsigned int i; - struct chrec_stats stats; - gimple cond, phi; - gimple_stmt_iterator psi; - - reset_chrecs_counters (stats); - - FOR_EACH_VEC_ELT (*exit_conditions, i, cond) -{ - struct loop *loop; - basic_block bb; - tree chrec; - - loop = loop_containing_stmt (cond); - bb = loop-header; - - for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (psi)) - { - phi = gsi_stmt (psi); - if (!virtual_operand_p (PHI_RESULT (phi))) - { - chrec = instantiate_parameters - (loop, -analyze_scalar_evolution (loop, PHI_RESULT (phi))); - - if (dump_file (dump_flags
Re: RFA: replace MD_REDIRECT_BRANCH with TARGET_CAN_FOLLOW_JUMP
Joern Rennecke joern.renne...@embecosm.com wrote: As discussed in the comments to PR38449, this patch replaces MD_REDIRECT_BRANCH with TARGET_CAN_FOLLOW_JUMP bootstrapped / regtested on i686-pc-linux-gnu cross-built / regtested for i686-pc-linux-gnu X sh-elf FYI, I've tested the patch with the top level make -k check also on sh4-unknown-linux-gnu with no new failures. SH portion looks fine. Regards, kaz
Re: patch to fix constant math -5th patch, rtl
Richard Biener richard.guent...@gmail.com writes: Can we in such cases please to a preparatory patch and change the CONST_INT/CONST_DOUBLE paths to do an explicit [sz]ext to mode precision first? I'm not sure what you mean here. CONST_INT HWIs are already sign-extended from mode precision to HWI precision. The 8-bit value 0xb1000 must be represented as (const_int -128); nothing else is allowed. E.g. (const_int 128) is not a valid QImode value on BITS_PER_UNIT==8 targets. What does wide-int do with VOIDmode mode inputs? It seems to ICE on them for from_rtx and use garbage (0) for from_shwi. Ugh. ICEing is right. As mentioned before, every rtx constant has a mode, whether it's stored in the rtx or not. Callers must keep track of what that mode is. Btw, plus_constant asserts that mode is either VOIDmode (I suppose semantically do arbitrary precision) No, not arbitrary precision. It's always the precision specified by the mode parameter. The assert is: gcc_assert (GET_MODE (x) == VOIDmode || GET_MODE (x) == mode); This is because GET_MODE always returns VOIDmode for CONST_INT and CONST_DOUBLE integers. The mode parameter is needed to tell us what precision those CONST_INTs and CONST_DOUBLEs actually have, because the rtx itself doesn't tell us. The mode parameter serves no purpose beyond that. So if the rtx does specify a mode (everything except CONST_INT and CONST_DOUBLE), the assert is making sure that the caller has correctly tracked the rtx's mode and provided the right mode parameter. The caller must do that for all rtxes, it's just that we can't assert for it in the CONST_INT and CONST_DOUBLE case, because the rtx has no mode to check against. If CONST_INT and CONST_DOUBLE did have a mode to check against, there would be no need for the mode parameter at all. Likewise there would be no need for wide_int::from_rtx to have a mode parameter. Richard
Re: [PATCH] Generate a label for the split cold function while using -freorder-blocks-and-partition
On Tue, Apr 23, 2013 at 9:59 PM, Jakub Jelinek ja...@redhat.com wrote: On Tue, Apr 23, 2013 at 03:58:06PM -0700, Sriraman Tallam wrote: This patch generates labels for cold function parts that are split when using the option -freorder-blocks-and-partition. The cold label name is generated by suffixing .cold to the assembler name of the hot function. This is useful when getting back traces from gdb when the cold function part does get executed. * final.c (final_scan_insn): Generate cold label name by suffixing .cold to function's assembler name. * gcc.dg/tree-prof/cold_partition_label.c: New test. This doesn't honor NO_DOT_IN_LABEL (and NO_DOLLAR_IN_LABEL). Also, don't some function start in cold section and then switch into hot section? That's a good question - with the current trunk implementation it can, but I have a patch that fixes this and many other bugs I found when trying to use -freorder-blocks-and-partition. I had sent the patch out for review awhile ago but it still needs review. See: http://gcc.gnu.org/ml/gcc-patches/2012-11/msg01303.html http://gcc.gnu.org/ml/gcc-patches/2012-11/msg02141.html (most recent version of patch). Teresa Jakub -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
Re: [C++ Patch] PR 56970
OK. Jason
change_address in rs6000_secondary_reload_inner
This one is in response to a comment made by Ulrich Weigand. Refer http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01804.html for why replace_equiv_address_nv is better than change_address in this reload related function. Bootstrapped etc. powerpc64-linux. * config/rs6000/rs6000.c (rs6000_secondary_reload_inner): Use replace_equiv_address_nv. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 198174) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -14433,7 +14417,7 @@ /* Adjust the address if it changed. */ if (addr != XEXP (mem, 0)) { - mem = change_address (mem, mode, addr); + mem = replace_equiv_address_nv (mem, addr); if (TARGET_DEBUG_ADDR) fprintf (stderr, \nrs6000_secondary_reload_inner, mem adjusted.\n); } -- Alan Modra Australia Development Lab, IBM
[Ada] gnatbind: improve messages in -v mode
This patch improves the messages that get printed by gnatbind when it is invoked with the -v switch. Tested on x86_64-pc-linux-gnu, committed on trunk 2013-04-24 Bob Duff d...@adacore.com * ali-util.ads (Source_Record): New component Stamp_File to record from whence the Stamp came. * ali-util.adb (Set_Source_Table): Set Stamp_File component. * bcheck.adb (Check_Consistency): Print additional information in Verbose_Mode. * gnatbind.adb (Gnatbind): Print additional information in Verbose_Mode. Index: ali-util.adb === --- ali-util.adb(revision 198221) +++ ali-util.adb(working copy) @@ -35,6 +35,8 @@ with Stringt; with Styleg; +with System.OS_Lib; use System.OS_Lib; + package body ALI.Util is -- Empty procedures needed to instantiate Scng. Error procedures are @@ -359,6 +361,7 @@ if Stamp (Stamp'First) /= ' ' then Source.Table (S).Stamp := Stamp; Source.Table (S).Source_Found := True; + Source.Table (S).Stamp_File := F; -- If we could not find the file, then the stamp is set -- from the dependency table entry (to be possibly reset @@ -367,6 +370,7 @@ else Source.Table (S).Stamp := Sdep.Table (D).Stamp; Source.Table (S).Source_Found := False; + Source.Table (S).Stamp_File := ALIs.Table (A).Afile; -- In All_Sources mode, flag error of file not found @@ -380,8 +384,9 @@ -- is off, so simply initialize the stamp from the Sdep entry else + Source.Table (S).Stamp := Sdep.Table (D).Stamp; Source.Table (S).Source_Found := False; - Source.Table (S).Stamp := Sdep.Table (D).Stamp; + Source.Table (S).Stamp_File := ALIs.Table (A).Afile; end if; -- Here if this is not the first time for this source file, @@ -407,13 +412,19 @@ -- source file even if Check_Source_Files is false, since -- if we find it, then we can use it to resolve which of the -- two timestamps in the ALI files is likely to be correct. + -- We only look in the current directory, because when + -- Check_Source_Files is false, other search directories are + -- likely to be incorrect. - if not Check_Source_Files then + if not Check_Source_Files +and then Is_Regular_File (Get_Name_String (F)) + then Stamp := Source_File_Stamp (F); if Stamp (Stamp'First) /= ' ' then Source.Table (S).Stamp := Stamp; Source.Table (S).Source_Found := True; +Source.Table (S).Stamp_File := F; end if; end if; @@ -432,6 +443,7 @@ else if Sdep.Table (D).Stamp Source.Table (S).Stamp then Source.Table (S).Stamp := Sdep.Table (D).Stamp; +Source.Table (S).Stamp_File := ALIs.Table (A).Afile; end if; end if; end if; Index: ali-util.ads === --- ali-util.ads(revision 198221) +++ ali-util.ads(working copy) @@ -6,7 +6,7 @@ -- -- -- S p e c -- -- -- --- Copyright (C) 1992-2011, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -57,6 +57,13 @@ -- located and the Stamp value was set from the actual source file. -- It is always false if Check_Source_Files is not set. + Stamp_File : File_Name_Type; + -- File that Stamp came from. If Source_Found is True, then Stamp is the + -- timestamp of the source file, and this is the name of the source + -- file. If Source_Found is False, then Stamp comes from a dependency + -- line in an ALI file, this is the name of that ALI file. Used only in + -- verbose mode, for messages. + All_Timestamps_Match : Boolean; -- This flag is set only if all files referencing this source file --