[PATCH] RISC-V: Fix bugs of available condition.

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Suppose there are 2 demand infos:

Demand 1: demand TAIL.
Demand 2: not demand TAIL.

If a block is demand 1, we should adjust this block is available both for 
demand 1 && 2.
However, if a block is demand 2, we should only adjust this block is available 
for demand 2 only.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (vector_insn_info::operator>=): Fix 
available condition.

---
 gcc/config/riscv/riscv-vsetvl.cc | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 6dbaea32b03..52f0195980a 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1048,12 +1048,10 @@ vector_insn_info::operator>= (const vector_insn_info 
) const
}
 }
 
-  if (demand_p (DEMAND_TAIL_POLICY) && !other.demand_p (DEMAND_TAIL_POLICY)
-  && get_ta () != other.get_ta ())
+  if (!demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY))
 return false;
 
-  if (demand_p (DEMAND_MASK_POLICY) && !other.demand_p (DEMAND_MASK_POLICY)
-  && get_ma () != other.get_ma ())
+  if (!demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY))
 return false;
 
   return true;
-- 
2.36.3



[PATCH] RISC-V: Simplify codes of changing vsetvl instruction

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch is NFC patch. I move these code as a function since we will
reuse it in the following patch (Refine phase 3 of VSETVL PASS)

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (change_vsetvl_insn): New function.
(pass_vsetvl::compute_global_backward_infos): Simplify codes.

---
 gcc/config/riscv/riscv-vsetvl.cc | 36 ++--
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index fe76bea297e..6dbaea32b03 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -880,6 +880,25 @@ change_insn (function_info *ssa, insn_change change, 
insn_info *insn,
   return true;
 }
 
+static void
+change_vsetvl_insn (const insn_info *insn, const vector_insn_info )
+{
+  rtx_insn *rinsn;
+  if (vector_config_insn_p (insn->rtl ()))
+{
+  rinsn = insn->rtl ();
+  gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
+}
+  else
+{
+  gcc_assert (has_vtype_op (insn->rtl ()));
+  rinsn = PREV_INSN (insn->rtl ());
+  gcc_assert (vector_config_insn_p (rinsn));
+}
+  rtx new_pat = gen_vsetvl_pat (rinsn, info);
+  change_insn (rinsn, new_pat);
+}
+
 avl_info::avl_info (const avl_info )
 {
   m_value = other.get_value ();
@@ -1941,7 +1960,6 @@ pass_vsetvl::compute_global_backward_infos (void)
   /* Backward propagate to each predecessor.  */
   FOR_EACH_EDGE (e, ei, cfg_bb->preds)
{
- rtx new_pat;
  auto _info
= m_vector_manager->vector_block_infos[e->src->index];
 
@@ -2011,21 +2029,7 @@ pass_vsetvl::compute_global_backward_infos (void)
be_merged = block_info.local_dem;
  vector_insn_info new_info = be_merged.merge (prop, true);
 
- rtx_insn *rinsn;
- if (vector_config_insn_p (new_info.get_insn ()->rtl ()))
-   {
- rinsn = new_info.get_insn ()->rtl ();
- gcc_assert (vsetvl_insn_p (rinsn)
- && "Can't handle X0, rs1 vsetvli yet");
-   }
- else
-   {
- gcc_assert (has_vtype_op (new_info.get_insn ()->rtl ()));
- rinsn = PREV_INSN (new_info.get_insn ()->rtl ());
- gcc_assert (vector_config_insn_p (rinsn));
-   }
- new_pat = gen_vsetvl_pat (rinsn, new_info);
- change_insn (rinsn, new_pat);
+ change_vsetvl_insn (new_info.get_insn (), new_info);
  if (block_info.local_dem == block_info.reaching_out)
block_info.local_dem = new_info;
  block_info.reaching_out = new_info;
-- 
2.36.3



[PATCH] RISC-V: Fix backward_propagate_worthwhile_p

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (loop_basic_block_p): Adjust function.
(backward_propagate_worthwhile_p): Fix non-worthwhile.

---
 gcc/config/riscv/riscv-vsetvl.cc | 91 +---
 1 file changed, 71 insertions(+), 20 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index ad0457ed89d..fe76bea297e 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -116,10 +116,27 @@ vlmax_avl_insn_p (rtx_insn *rinsn)
  || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
 }
 
+/* Return true if the block is a loop itself:
+ local_dem
+__
+| |
+   || |
+   || |
+|_|
+ reaching_out
+*/
 static bool
 loop_basic_block_p (const basic_block cfg_bb)
 {
-  return JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb));
+  if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
+{
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, cfg_bb->succs)
+   if (e->dest->index == cfg_bb->index)
+ return true;
+}
+  return false;
 }
 
 /* Return true if it is an RVV instruction depends on VTYPE global
@@ -271,26 +288,60 @@ backward_propagate_worthwhile_p (const basic_block cfg_bb,
 {
   if (loop_basic_block_p (cfg_bb))
 {
-  if (block_info.local_dem.compatible_p (block_info.reaching_out))
-   return true;
-
-  /* There is a obvious case that is not worthwhile and meaningless
-to propagate the demand information:
- local_dem
-__
-| |
-   || |
-   || |
-|_|
- reaching_out
- Header is incompatible with reaching_out and the block is loop itself,
- we don't backward propagate the local_dem since we can't avoid emit
- vsetvl for the local_dem.  */
-  edge e;
-  edge_iterator ei;
-  FOR_EACH_EDGE (e, ei, cfg_bb->succs)
-   if (e->dest->index == cfg_bb->index)
+  if (block_info.reaching_out.valid_or_dirty_p ())
+   {
+ if (block_info.local_dem.compatible_p (block_info.reaching_out))
+   {
+ /* Case 1 (Can backward propagate):
+
+bb0:
+...
+for (int i = 0; i < n; i++)
+  {
+vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+__riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+  }
+The local_dem is compatible with reaching_out. Such case is
+worthwhile backward propagation.  */
+ return true;
+   }
+ else
+   {
+ /* Case 2 (Don't backward propagate):
+   
+   bb0:
+   ...
+   for (int i = 0; i < n; i++)
+ {
+   vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+   __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+   vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
+   __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
+ }
+The local_dem is incompatible with reaching_out.
+It makes no sense to backward propagate the local_dem since we
+can't avoid VSETVL inside the loop.  */
+ return false;
+   }
+   }
+  else
+   {
+ gcc_assert (block_info.reaching_out.unknown_p ());
+ /* Case 3 (Don't backward propagate):
+   
+   bb0:
+   ...
+   for (int i = 0; i < n; i++)
+ {
+   vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
+   __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
+   fn3 ();
+ }
+   The local_dem is VALID, but the reaching_out is UNKNOWN.
+   It makes no sense to backward propagate the local_dem since we
+   can't avoid VSETVL inside the loop.  */
  return false;
+   }
 }
 
   return true;
-- 
2.36.3



[PATCH] RISC-V: Fix wrong in_group flag in validate_change call function

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Since we only change insn which is not in group. The flag currently is not 
correct.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (change_insn): Adjust in_group in 
validate_change.

---
 gcc/config/riscv/riscv-vsetvl.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 1afe76304fb..ad0457ed89d 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -787,7 +787,7 @@ change_insn (rtx_insn *rinsn, rtx new_pat)
   print_rtl_single (dump_file, PATTERN (rinsn));
 }
 
-  validate_change (rinsn,  (rinsn), new_pat, true);
+  validate_change (rinsn,  (rinsn), new_pat, false);
 
   if (dump_file)
 {
-- 
2.36.3



[PATCH] RISC-V: Fix bugs for refine vsetvl a5, zero into vsetvl zero, zero incorrectly

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Currently we support this optimization:

bb 0:
 vsetvli a5,zero,e32,mf2
bb 1:
 vsetvli a5,zero,e64,m1 --> vsetvli zero,zero,e64,m1

According RVV ISA, we can do this optimization only if both RATIO and AVL are 
equal.
However, current VSETVL PASS missed the check of AVL. This patch add this 
condition
check to fix bugs.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_same_avl_p): 
New function.
(pass_vsetvl::can_refine_vsetvl_p): Add AVL check.
(pass_vsetvl::commit_vsetvls): Ditto.
* config/riscv/riscv-vsetvl.h: New function declaration.
 
---
 gcc/config/riscv/riscv-vsetvl.cc | 35 
 gcc/config/riscv/riscv-vsetvl.h  |  3 +++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index ce1e9e3609f..1afe76304fb 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1440,6 +1440,29 @@ vector_infos_manager::all_same_ratio_p (sbitmap bitdata) 
const
   return true;
 }
 
+bool
+vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
+ sbitmap bitdata) const
+{
+  if (bitmap_empty_p (bitdata))
+return false;
+
+  const auto _info = vector_block_infos[cfg_bb->index];
+  if (!block_info.local_dem.demand_p (DEMAND_AVL))
+return true;
+
+  avl_info avl = block_info.local_dem.get_avl_info ();
+  unsigned int bb_index;
+  sbitmap_iterator sbi;
+
+  EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
+  {
+if (vector_exprs[bb_index]->get_avl_info () != avl)
+  return false;
+  }
+  return true;
+}
+
 size_t
 vector_infos_manager::expr_set_num (sbitmap bitdata) const
 {
@@ -2113,6 +2136,10 @@ pass_vsetvl::can_refine_vsetvl_p (const basic_block 
cfg_bb, uint8_t ratio) const
m_vector_manager->vector_avin[cfg_bb->index]))
 return false;
 
+  if (!m_vector_manager->all_same_avl_p (
+   cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
+return false;
+
   size_t expr_id
 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
   if (m_vector_manager->vector_exprs[expr_id]->get_ratio () != ratio)
@@ -2227,11 +2254,11 @@ pass_vsetvl::commit_vsetvls (void)
 
  insn_info *insn = require->get_insn ();
  vector_insn_info prev_info = vector_insn_info ();
- if (m_vector_manager->all_same_ratio_p (
-   m_vector_manager->vector_avout[eg->src->index]))
+ sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
+ if (m_vector_manager->all_same_ratio_p (bitdata)
+ && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
{
- size_t first = bitmap_first_set_bit (
-   m_vector_manager->vector_avout[eg->src->index]);
+ size_t first = bitmap_first_set_bit (bitdata);
  prev_info = *m_vector_manager->vector_exprs[first];
}
 
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
index 6f27004fab1..c8218a6ff00 100644
--- a/gcc/config/riscv/riscv-vsetvl.h
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -333,6 +333,9 @@ public:
   /* Get all relaxer expression id for corresponding vector info.  */
   auto_vec get_all_available_exprs (const vector_insn_info &) const;
 
+  /* Return true if all expression set in bitmap are same AVL.  */
+  bool all_same_avl_p (const basic_block, sbitmap) const;
+
   /* Return true if all expression set in bitmap are same ratio.  */
   bool all_same_ratio_p (sbitmap) const;
 
-- 
2.36.3



Re: [PATCH] loading float member of parameter stored via int registers

2023-01-02 Thread Jiufu Guo via Gcc-patches


Hi,

Andrew Pinski  writes:

> On Thu, Dec 29, 2022 at 11:45 PM Segher Boessenkool
>  wrote:
>>
>> Hi!
>>
>> On Fri, Dec 30, 2022 at 10:22:31AM +0800, Jiufu Guo wrote:
>> > Considering the limitations of CSE, I try to find other places
>> > to handle this issue, and notice DSE can optimize below code:
>> > "[sfp:DI]=x:DI ; y:SI=[sfp:DI]" to "y:SI=x:DI#0".
>> >
>> > So, I drafted a patch to update DSE to handle DI->DF/SF.
>> > The patch updates "extract_low_bits" to get mode change
>> > with subreg.
>> >
>> > diff --git a/gcc/expmed.cc b/gcc/expmed.cc
>> > index b12b0e000c2..5e36331082c 100644
>> > --- a/gcc/expmed.cc
>> > +++ b/gcc/expmed.cc
>> > @@ -2439,7 +2439,10 @@ extract_low_bits (machine_mode mode, machine_mode 
>> > src_mode, rtx src)
>> >
>> >if (!targetm.modes_tieable_p (src_int_mode, src_mode))
>> >  return NULL_RTX;
>> > -  if (!targetm.modes_tieable_p (int_mode, mode))
>> > +  if (!targetm.modes_tieable_p (int_mode, mode)
>> > +  && !(known_le (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
>> > +&& GET_MODE_CLASS (mode) == MODE_FLOAT
>> > +&& GET_MODE_CLASS (src_mode) == MODE_INT))
>> >  return NULL_RTX;
>> >
>> >src = gen_lowpart (src_int_mode, src);
>>
>> Ah!  This simply shows rs6000_modes_tieable_p is decidedly non-optimal:
>> it does not allow tying a scalar float to anything else.  No such thing
>> is required, or good apparently.  I wonder why we have such restrictions
>> at all in rs6000; is it just unfortunate history, was it good at one
>> point in time?
>
> The documentation for TARGET_MODES_TIEABLE_P says the following:
> If TARGET_HARD_REGNO_MODE_OK (r, mode1) and TARGET_HARD_REGNO_MODE_OK
> (r, mode2) are always the same for any r, then TARGET_MODES_TIEABLE_P
> (mode1, mode2) should be true. If they differ for any r, you should
> define this hook to return false unless some other mechanism ensures
> the accessibility of the value in a narrower mode.
>
> even though rs6000_hard_regno_mode_ok_uncached's comment has the following:
>   /* The float registers (except for VSX vector modes) can only hold floating
>  modes and DImode.  */
>
> TARGET_P8_VECTOR and TARGET_P9_VECTOR has special cased different modes now:
>   if (TARGET_P8_VECTOR && (mode == SImode))
> return 1;
>
>   if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
> return 1;
> Which I suspect that means rs6000_modes_tieable_p should return true
> for SImode and SFmode if TARGET_P8_VECTOR is true. Likewise for
> TARGET_P9_VECTOR and SFmode and QImode/HImode too.
>
Thanks for your great comments!

modes_tieable_p is invoked by a few places besides extract_low_bits, so
updating this hook to relax the restriction may benefit more passes.

We may update modes_tieable_p for more cases as possible.
A hacked patch for "float vs. int" is listed at the end of this mail.

While back to the issue of this PR: optimize float loading which is
stored from the int register.  DSE works more on basicblock, so updating
modes_tieable_p (or extract_low_bits) can not handle some cases like:

double __attribute__ ((noipa)) foo_df (DF arg, int flag)
{
  if (flag == 2)
 return arg.a[3];
  return 0.0;
}

I'm thinking a way to handle this case.


BR,
Jeff (Jiufu)

>
> Thanks,
> Andrew Pinski
>
>>
>>
>> Segher

(To be refined.)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b3a609f3aa3..8088a608be6 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1959,6 +1959,17 @@ rs6000_hard_regno_mode_ok (unsigned int regno, 
machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
+  
+  if ((GET_MODE_CLASS (mode1) == MODE_FLOAT
+   && (GET_MODE_SIZE (mode2) == UNITS_PER_FP_WORD
+  || (TARGET_P8_VECTOR && (mode2 == SImode))
+  || (TARGET_P9_VECTOR && (mode2 == QImode || mode2 == HImode
+  || (GET_MODE_CLASS (mode2) == MODE_FLOAT
+ && (GET_MODE_SIZE (mode1) == UNITS_PER_FP_WORD
+ || (TARGET_P8_VECTOR && (mode1 == SImode))
+ || (TARGET_P9_VECTOR && (mode1 == QImode || mode1 == HImode)
+return true;
+
   if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
   || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
 return mode1 == mode2;
---


[PATCH] RISC-V: Fix vsetivli instruction asm for IMM AVL

2023-01-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Notice that we should used vsetivli zero,4 instead of vsetvli zero,4
for IMM AVL (0 ~ 31) according to RVV ISA.

This patch fix vsetivli instruction asm bug.

gcc/ChangeLog:

* config/riscv/vector.md:

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vle-constraint-1.c:

---
 gcc/config/riscv/vector.md   |  2 +-
 .../gcc.target/riscv/rvv/base/vle-constraint-1.c | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3d0174f98a2..4e93b7fead5 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -561,7 +561,7 @@
(match_operand 3 "const_int_operand" "i")
(match_operand 4 "const_int_operand" "i")] UNSPEC_VSETVL))]
   "TARGET_VECTOR"
-  "vsetvli\tzero,%0,e%1,%m2,t%p3,m%p4"
+  "vset%i0vli\tzero,%0,e%1,%m2,t%p3,m%p4"
   [(set_attr "type" "vsetvl")
(set_attr "mode" "")])
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-constraint-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-constraint-1.c
index b7cf98bfd9f..8d01e2082f8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-constraint-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-constraint-1.c
@@ -6,7 +6,7 @@
 
 /*
 ** f1:
-** vsetvli\tzero,4,e32,m1,tu,ma
+** vsetivli\tzero,4,e32,m1,tu,ma
 ** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
@@ -23,7 +23,7 @@ void f1 (float * in, float *out)
 ** f2:
 ** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
 ** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
-** vsetvli\tzero,4,e32,m1,ta,ma
+** vsetivli\tzero,4,e32,m1,ta,ma
 ** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
 ** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
@@ -41,7 +41,7 @@ void f2 (float * in, float *out)
 ** f3:
 ** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
 ** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
-** vsetvli\tzero,4,e32,m1,tu,mu
+** vsetivli\tzero,4,e32,m1,tu,mu
 ** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
 ** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
@@ -58,7 +58,7 @@ void f3 (float * in, float *out)
 
 /*
 ** f4:
-** vsetvli\tzero,4,e8,mf8,tu,ma
+** vsetivli\tzero,4,e8,mf8,tu,ma
 ** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
@@ -75,7 +75,7 @@ void f4 (int8_t * in, int8_t *out)
 ** f5:
 ** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
 ** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
-** vsetvli\tzero,4,e8,mf8,ta,ma
+** vsetivli\tzero,4,e8,mf8,ta,ma
 ** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
 ** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
@@ -93,7 +93,7 @@ void f5 (int8_t * in, int8_t *out)
 ** f6:
 ** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
 ** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
-** vsetvli\tzero,4,e8,mf8,tu,mu
+** vsetivli\tzero,4,e8,mf8,tu,mu
 ** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
 ** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
 ** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
-- 
2.36.3



Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jeff Law via Gcc-patches




On 1/2/23 10:30, ro...@nextmovesoftware.com wrote:


Hi Jeff,


On 2 Jan 2023, at 15:45, Jeff Law  wrote:
On 1/1/23 08:55, Roger Sayle wrote:

In 2011, the rtl.texi documentation was updated to reflect that the
modes of the RTX unary operations FFS, POPCOUNT and PARITY must
match those of their operands.  Unfortunately, some of the transformations
in simplify-rtx.cc predate this tightening of RTL semantics, and have
not (until now) been updated/fixed.  i.e. The POPCOUNT and PARITY
optimizations were "correct" when I originally added them back in 2007.
Segher requested that I split this piece out from a fix for PR 106594 in
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601501.html
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?
2023-01-01  Roger Sayle  
gcc/ChangeLog
* gcc/simplify-rtx.cc (simplify_unary_operation_1) :
Avoid generating FFS with mismatched operand and result modes, by
using an explicit SIGN_EXTEND/ZERO_EXTEND instead.
: Likewise, for POPCOUNT of ZERO_EXTEND.
: Likewise, for PARITY of {ZERO,SIGN}_EXTEND.

?!?  The docs still seem to indicate to me that the modes of the input and 
output operands can differ.
Let's take PARITY as an example:


@cindex @code{parity@var{m}2} instruction pattern
@item @samp{parity@var{m}2}
Store into operand 0 the parity of operand 1, i.e.@: the number of 1-bits
in operand 1 modulo 2.
@var{m} is either a scalar or vector integer mode.  When it is a scalar,
operand 1 has mode @var{m} but operand 0 can have whatever scalar
integer mode is suitable for the target.


The mode of the pattern name has to match the mode of the input operand.  The 
mode of the
output operand can differ from the mode of the input operand.  we seem to have 
a disagreement
on the documented semantics of these opcodes.


The documentation that you're looking at is the definition of the parity optab 
in
md.texi, not the definition of the PARITY rtx in rtl.texi.  The distinction is 
subtle.
Hence a backend can define paritysiqi2 but in the RTL pattern it expands to the
unary PARITY operator must have the same result type as its operand type,
wrapped in either a truncate or extension if necessary.
Hmm, yea I was looking at md.texi, not rtl.texi.  I'll take a look at 
the latter after I get some coffee.


Thanks!
jeff


Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jeff Law via Gcc-patches




On 1/2/23 10:22, Jakub Jelinek wrote:

On Mon, Jan 02, 2023 at 09:20:33AM -0700, Jeff Law wrote:

In fact Raphael and I were about to submit a patch which takes advantage of
that capability to improve the code slightly for risc-v.


Just use a pattern with zero_extend or sign_extend around it or subreg of
it?

If it were only that easy ;(  In the bowels of the simplifications the
zero_extension turns into either a pair of shifts or an AND with a mask (I
forget which offhand).  I'm sure we *can* work around this in the target,
but it'll be ugly.

The documentation definitely needs to be updated.  I looked at the whole
family a few weeks ago and my recollection was they all need to be fixed
(ffs, clrsb, clz, ctz, popcount & parity) if the defined semantics are that
the input and output operand modes must match.


When I look at the documentation of all the above, all of them have
"The mode of @var{x} must be @var{m} or @code{VOIDmode}."

M is the mode on the insn (ie clzsi2).  I don't see a reference to
 ^^
@var{x}.  Weird.

jeff


Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread ro...@nextmovesoftware.com


Hi Jeff,

> On 2 Jan 2023, at 15:45, Jeff Law  wrote:
> On 1/1/23 08:55, Roger Sayle wrote:
>> In 2011, the rtl.texi documentation was updated to reflect that the
>> modes of the RTX unary operations FFS, POPCOUNT and PARITY must
>> match those of their operands.  Unfortunately, some of the transformations
>> in simplify-rtx.cc predate this tightening of RTL semantics, and have
>> not (until now) been updated/fixed.  i.e. The POPCOUNT and PARITY
>> optimizations were "correct" when I originally added them back in 2007.
>> Segher requested that I split this piece out from a fix for PR 106594 in
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601501.html
>> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
>> and make -k check, both with and without --target_board=unix{-m32},
>> with no new failures.  Ok for mainline?
>> 2023-01-01  Roger Sayle  
>> gcc/ChangeLog
>>  * gcc/simplify-rtx.cc (simplify_unary_operation_1) :
>>  Avoid generating FFS with mismatched operand and result modes, by
>>  using an explicit SIGN_EXTEND/ZERO_EXTEND instead.
>>  : Likewise, for POPCOUNT of ZERO_EXTEND.
>>  : Likewise, for PARITY of {ZERO,SIGN}_EXTEND.
> ?!?  The docs still seem to indicate to me that the modes of the input and 
> output operands can differ.
> Let's take PARITY as an example:
> 
>> @cindex @code{parity@var{m}2} instruction pattern
>> @item @samp{parity@var{m}2}
>> Store into operand 0 the parity of operand 1, i.e.@: the number of 1-bits
>> in operand 1 modulo 2.
>> @var{m} is either a scalar or vector integer mode.  When it is a scalar,
>> operand 1 has mode @var{m} but operand 0 can have whatever scalar
>> integer mode is suitable for the target.  
> 
> The mode of the pattern name has to match the mode of the input operand.  The 
> mode of the
> output operand can differ from the mode of the input operand.  we seem to 
> have a disagreement
> on the documented semantics of these opcodes.

The documentation that you're looking at is the definition of the parity optab 
in
md.texi, not the definition of the PARITY rtx in rtl.texi.  The distinction is 
subtle.
Hence a backend can define paritysiqi2 but in the RTL pattern it expands to the
unary PARITY operator must have the same result type as its operand type,
wrapped in either a truncate or extension if necessary.

I hope this helps.

Cheers,
Roger
--



Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jakub Jelinek via Gcc-patches
On Mon, Jan 02, 2023 at 09:20:33AM -0700, Jeff Law wrote:
> > > In fact Raphael and I were about to submit a patch which takes advantage 
> > > of
> > > that capability to improve the code slightly for risc-v.
> > 
> > Just use a pattern with zero_extend or sign_extend around it or subreg of
> > it?
> If it were only that easy ;(  In the bowels of the simplifications the
> zero_extension turns into either a pair of shifts or an AND with a mask (I
> forget which offhand).  I'm sure we *can* work around this in the target,
> but it'll be ugly.
> 
> The documentation definitely needs to be updated.  I looked at the whole
> family a few weeks ago and my recollection was they all need to be fixed
> (ffs, clrsb, clz, ctz, popcount & parity) if the defined semantics are that
> the input and output operand modes must match.

When I look at the documentation of all the above, all of them have
"The mode of @var{x} must be @var{m} or @code{VOIDmode}."

Jakub



Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jeff Law via Gcc-patches




On 1/2/23 08:59, Jakub Jelinek wrote:

On Mon, Jan 02, 2023 at 08:45:15AM -0700, Jeff Law via Gcc-patches wrote:

On 1/1/23 08:55, Roger Sayle wrote:

In 2011, the rtl.texi documentation was updated to reflect that the
modes of the RTX unary operations FFS, POPCOUNT and PARITY must
match those of their operands.  Unfortunately, some of the transformations
in simplify-rtx.cc predate this tightening of RTL semantics, and have
not (until now) been updated/fixed.  i.e. The POPCOUNT and PARITY
optimizations were "correct" when I originally added them back in 2007.

Segher requested that I split this piece out from a fix for PR 106594 in
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601501.html

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2023-01-01  Roger Sayle  

gcc/ChangeLog
* gcc/simplify-rtx.cc (simplify_unary_operation_1) :
Avoid generating FFS with mismatched operand and result modes, by
using an explicit SIGN_EXTEND/ZERO_EXTEND instead.
: Likewise, for POPCOUNT of ZERO_EXTEND.
: Likewise, for PARITY of {ZERO,SIGN}_EXTEND.

?!?  The docs still seem to indicate to me that the modes of the input and
output operands can differ.  Let's take PARITY as an example:


See the PR50161 thread in
https://gcc.gnu.org/legacy-ml/gcc-patches/2011-08/threads.html#01847
The options are to disallow different modes, which is what my patch did
(perhaps not all documentation has been tweaked), or ensure that the operand
of those is never constant.  The latter is much harder and needs to be done
in many places.  While for SUBREG/ZERO_EXTEND/SIGN_EXTEND and to some extend
also FLOAT/UNSIGNED_FLOAT we already try hard not to fold those immediately
(and still find every now and then spots where we don't do that), for the
rarely used unary rtls we certainly don't.
Sigh.  Lack of modes on constants mucking things up elsewhere.  There's 
no good reason other than our poor representation to force the input and 
output modes to match for these instructions.






In fact Raphael and I were about to submit a patch which takes advantage of
that capability to improve the code slightly for risc-v.


Just use a pattern with zero_extend or sign_extend around it or subreg of
it?
If it were only that easy ;(  In the bowels of the simplifications the 
zero_extension turns into either a pair of shifts or an AND with a mask 
(I forget which offhand).  I'm sure we *can* work around this in the 
target, but it'll be ugly.


The documentation definitely needs to be updated.  I looked at the whole 
family a few weeks ago and my recollection was they all need to be fixed 
(ffs, clrsb, clz, ctz, popcount & parity) if the defined semantics are 
that the input and output operand modes must match.


Jeff


Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jakub Jelinek via Gcc-patches
On Mon, Jan 02, 2023 at 08:45:15AM -0700, Jeff Law via Gcc-patches wrote:
> On 1/1/23 08:55, Roger Sayle wrote:
> > In 2011, the rtl.texi documentation was updated to reflect that the
> > modes of the RTX unary operations FFS, POPCOUNT and PARITY must
> > match those of their operands.  Unfortunately, some of the transformations
> > in simplify-rtx.cc predate this tightening of RTL semantics, and have
> > not (until now) been updated/fixed.  i.e. The POPCOUNT and PARITY
> > optimizations were "correct" when I originally added them back in 2007.
> > 
> > Segher requested that I split this piece out from a fix for PR 106594 in
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601501.html
> > 
> > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > and make -k check, both with and without --target_board=unix{-m32},
> > with no new failures.  Ok for mainline?
> > 
> > 
> > 2023-01-01  Roger Sayle  
> > 
> > gcc/ChangeLog
> > * gcc/simplify-rtx.cc (simplify_unary_operation_1) :
> > Avoid generating FFS with mismatched operand and result modes, by
> > using an explicit SIGN_EXTEND/ZERO_EXTEND instead.
> > : Likewise, for POPCOUNT of ZERO_EXTEND.
> > : Likewise, for PARITY of {ZERO,SIGN}_EXTEND.
> ?!?  The docs still seem to indicate to me that the modes of the input and
> output operands can differ.  Let's take PARITY as an example:

See the PR50161 thread in
https://gcc.gnu.org/legacy-ml/gcc-patches/2011-08/threads.html#01847
The options are to disallow different modes, which is what my patch did
(perhaps not all documentation has been tweaked), or ensure that the operand
of those is never constant.  The latter is much harder and needs to be done
in many places.  While for SUBREG/ZERO_EXTEND/SIGN_EXTEND and to some extend
also FLOAT/UNSIGNED_FLOAT we already try hard not to fold those immediately
(and still find every now and then spots where we don't do that), for the
rarely used unary rtls we certainly don't.

> In fact Raphael and I were about to submit a patch which takes advantage of
> that capability to improve the code slightly for risc-v.

Just use a pattern with zero_extend or sign_extend around it or subreg of
it?

Jakub



Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-02 Thread Jeff Law via Gcc-patches




On 1/1/23 08:55, Roger Sayle wrote:

In 2011, the rtl.texi documentation was updated to reflect that the
modes of the RTX unary operations FFS, POPCOUNT and PARITY must
match those of their operands.  Unfortunately, some of the transformations
in simplify-rtx.cc predate this tightening of RTL semantics, and have
not (until now) been updated/fixed.  i.e. The POPCOUNT and PARITY
optimizations were "correct" when I originally added them back in 2007.

Segher requested that I split this piece out from a fix for PR 106594 in
https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601501.html

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2023-01-01  Roger Sayle  

gcc/ChangeLog
* gcc/simplify-rtx.cc (simplify_unary_operation_1) :
Avoid generating FFS with mismatched operand and result modes, by
using an explicit SIGN_EXTEND/ZERO_EXTEND instead.
: Likewise, for POPCOUNT of ZERO_EXTEND.
: Likewise, for PARITY of {ZERO,SIGN}_EXTEND.
?!?  The docs still seem to indicate to me that the modes of the input 
and output operands can differ.  Let's take PARITY as an example:



@cindex @code{parity@var{m}2} instruction pattern
@item @samp{parity@var{m}2}
Store into operand 0 the parity of operand 1, i.e.@: the number of 1-bits
in operand 1 modulo 2.

@var{m} is either a scalar or vector integer mode.  When it is a scalar,
operand 1 has mode @var{m} but operand 0 can have whatever scalar
integer mode is suitable for the target.  


The mode of the pattern name has to match the mode of the input operand. 
 The mode of the output operand can differ from the mode of the input 
operand.  we seem to have a disagreement on the documented semantics of 
these opcodes.


In fact Raphael and I were about to submit a patch which takes advantage 
of that capability to improve the code slightly for risc-v.


Jeff


Re: [PATCH 1/3] Compute a table of DWARF register sizes at compile

2023-01-02 Thread Florian Weimer via Gcc-patches
* Jeff Law:

> On 11/8/22 11:05, Florian Weimer via Gcc-patches wrote:
>> The sizes are compile-time constants.  Create a vector with them,
>> so that they can be inspected at compile time.
>>
>>  * gcc/dwarf2cfi.cc (init_return_column_size): Remove.
>>  (init_one_dwarf_reg_size): Adjust.
>>  (generate_dwarf_reg_sizes): New function.  Extracted
>>  from expand_builtin_init_dwarf_reg_sizes.
>>  (expand_builtin_init_dwarf_reg_sizes): Call
>>  generate_dwarf_reg_sizes.
>>  * gcc/target.def (init_dwarf_reg_sizes_extra): Adjust
>>  hook signature.
>>  * gcc/config/msp430/msp430.cc
>>  (msp430_init_dwarf_reg_sizes_extra): Adjust.
>>  * gcc/config/rs6000.cc (rs6000_init_dwarf_reg_sizes_extra):
>>  Likewise.
>>  * gcc/doc/tm.texi: Update.
>
> This series of 3 patches is fine.

Thanks, now pushed (after polishing the ChangeLog snippets).

Florian



Re: [PATCH] gccrs: add selftest-rust-gdb and selftest-rust-valgrind "make" targets

2023-01-02 Thread Arthur Cohen

Hi David,

On 12/16/22 18:01, David Malcolm wrote:

Add "make" targets to make it easy to run the rust selftests under gdb
and under valgrind via:
   make selftest-rust-gdb
and
   make selftest-rust-valgrind
respectively, similar to analogous "make" targets in the C and C++
frontends.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/rust/ChangeLog:
* Make-lang.in (selftest-rust-gdb): New.
(selftest-rust-valgrind): New.

Signed-off-by: David Malcolm 
---
  gcc/rust/Make-lang.in | 12 
  1 file changed, 12 insertions(+)

diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 681ac7b3fee..76015b3426b 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -275,6 +275,18 @@ s-selftest-rust: $(RUST_SELFTEST_DEPS)
$(GCC_FOR_TARGET) $(RUST_SELFTEST_FLAGS)
$(STAMP) $@
  
+# Convenience methods for running rust selftests under gdb:

+.PHONY: selftest-rust-gdb
+selftest-rust-gdb: $(RUST_SELFTEST_DEPS)
+   $(GCC_FOR_TARGET) $(RUST_SELFTEST_FLAGS) \
+ -wrapper gdb,--args
+
+# Convenience methods for running rust selftests under valgrind:
+.PHONY: selftest-rust-valgrind
+selftest-rust-valgrind: $(RUST_SELFTEST_DEPS)
+   $(GCC_FOR_TARGET) $(RUST_SELFTEST_FLAGS) \
+ -wrapper valgrind,--leak-check=full
+
  # Install info documentation for the front end, if it is present in the 
source directory. This target
  # should have dependencies on info files that should be installed.
  rust.install-info:


OK for trunk :) Thanks again!

--
Arthur Cohen 

Toolchain Engineer

Embecosm GmbH

Geschäftsführer: Jeremy Bennett
Niederlassung: Nürnberg
Handelsregister: HR-B 36368
www.embecosm.de

Fürther Str. 27
90429 Nürnberg


Tel.: 091 - 128 707 040
Fax: 091 - 128 707 077


OpenPGP_0x1B3465B044AD9C65.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH] gccrs: avoid printing to stderr in selftest::rust_flatten_list

2023-01-02 Thread Arthur Cohen

Hi David,

Sorry for the delayed reply!

On 12/16/22 18:01, David Malcolm wrote:

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/rust/ChangeLog:
* resolve/rust-ast-resolve-item.cc (selftest::rust_flatten_list):
Remove output to stderr.

Signed-off-by: David Malcolm 
---
  gcc/rust/resolve/rust-ast-resolve-item.cc | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/gcc/rust/resolve/rust-ast-resolve-item.cc 
b/gcc/rust/resolve/rust-ast-resolve-item.cc
index 0c38f28d530..1276e845acc 100644
--- a/gcc/rust/resolve/rust-ast-resolve-item.cc
+++ b/gcc/rust/resolve/rust-ast-resolve-item.cc
@@ -1202,9 +1202,6 @@ rust_flatten_list (void)
auto paths = std::vector ();
Rust::Resolver::flatten_list (list, paths);
  
-  for (auto  : paths)

-fprintf (stderr, "%s\n", path.as_string ().c_str ());
-
ASSERT_TRUE (!paths.empty ());
ASSERT_EQ (paths.size (), 2);
ASSERT_EQ (paths[0].get_segments ()[0].as_string (), "foo");


Looks good to me. OK for trunk :)

Thanks for taking the time!

All the best,

--
Arthur Cohen 

Toolchain Engineer

Embecosm GmbH

Geschäftsführer: Jeremy Bennett
Niederlassung: Nürnberg
Handelsregister: HR-B 36368
www.embecosm.de

Fürther Str. 27
90429 Nürnberg


Tel.: 091 - 128 707 040
Fax: 091 - 128 707 077


OpenPGP_0x1B3465B044AD9C65.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


[PATCH] modula-2: Fix registration of modules via constructors [PR108183].

2023-01-02 Thread Iain Sandoe via Gcc-patches
 When I first made this patch I had a question as to what should be
 done for registration CTORs generated by the compiler for .mod files.
 I've now answered that question (the code that makes the GCC decl
 has also be updated in a separately posted patch).
 
tested on x86_64-linux-gnu, x86_64, aarch64-darwin21,
OK for master?
Thanks,
Iain
 
 --- 8< ---

This reworks the mechanism used for module registration to use init-
time constructors.  The order of registration is not important, the
actual initialization dependency tree will be computed early in the
execution (all that matters is that we have registered before that).

This fixes a potential issue in which the external name known to the
m2 system is of the form _M2_XX_ctor() but the C++ code was
producing a static variable instance with the same name.

Signed-off-by: Iain Sandoe 

PR modula2/108183

gcc/m2/ChangeLog:

* gm2-libs-ch/UnixArgs.cc (_M2_UnixArgs_ctor): Rework to use
an extern "C" function with 'constructor' attribute.
* gm2-libs-ch/dtoa.cc (_M2_dtoa_ctor): Likewise.
* gm2-libs-ch/ldtoa.cc (_M2_ldtoa_ctor): Likewise.

libgm2/ChangeLog:

* libm2cor/KeyBoardLEDs.cc (_M2_KeyBoardLEDs_ctor): Rework to use
an extern "C" function with 'constructor' attribute.
* libm2iso/ErrnoCategory.cc (_M2_ErrnoCategory_ctor): Likewise.
* libm2iso/RTco.cc (_M2_RTco_ctor): Likewise.
* libm2pim/Selective.cc (_M2_Selective_ctor): Likewise.
* libm2pim/SysExceptions.cc (_M2_SysExceptions_ctor): Likewise.
* libm2pim/UnixArgs.cc (_M2_UnixArgs_ctor): Likewise.
* libm2pim/cgetopt.cc (_M2_cgetopt_ctor): Likewise.
* libm2pim/dtoa.cc (_M2_dtoa_ctor): Likewise.
* libm2pim/errno.cc (_M2_errno_ctor): Likewise.
* libm2pim/ldtoa.cc (_M2_ldtoa_ctor): Likewise.
* libm2pim/sckt.cc (_M2_sckt_ctor): Likewise.
* libm2pim/termios.cc (_M2_termios_ctor): Likewise.
* libm2pim/wrapc.c: Add a new line to the file end.
---
 gcc/m2/gm2-libs-ch/UnixArgs.cc   | 5 ++---
 gcc/m2/gm2-libs-ch/dtoa.cc   | 5 ++---
 gcc/m2/gm2-libs-ch/ldtoa.cc  | 5 ++---
 libgm2/libm2cor/KeyBoardLEDs.cc  | 5 ++---
 libgm2/libm2iso/ErrnoCategory.cc | 5 ++---
 libgm2/libm2iso/RTco.cc  | 5 ++---
 libgm2/libm2pim/Selective.cc | 5 ++---
 libgm2/libm2pim/SysExceptions.cc | 5 ++---
 libgm2/libm2pim/UnixArgs.cc  | 5 ++---
 libgm2/libm2pim/cgetopt.cc   | 5 ++---
 libgm2/libm2pim/dtoa.cc  | 5 ++---
 libgm2/libm2pim/errno.cc | 5 ++---
 libgm2/libm2pim/ldtoa.cc | 5 ++---
 libgm2/libm2pim/sckt.cc  | 5 ++---
 libgm2/libm2pim/termios.cc   | 5 ++---
 libgm2/libm2pim/wrapc.c  | 1 +
 16 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/gcc/m2/gm2-libs-ch/UnixArgs.cc b/gcc/m2/gm2-libs-ch/UnixArgs.cc
index 1180f351b24..4bb3769644c 100644
--- a/gcc/m2/gm2-libs-ch/UnixArgs.cc
+++ b/gcc/m2/gm2-libs-ch/UnixArgs.cc
@@ -82,9 +82,8 @@ _M2_UnixArgs_dep (void)
 {
 }
 
-struct _M2_UnixArgs_ctor { _M2_UnixArgs_ctor (); } _M2_UnixArgs_ctor;
-
-_M2_UnixArgs_ctor::_M2_UnixArgs_ctor (void)
+extern "C" void __attribute__((__constructor__))
+_M2_UnixArgs_ctor (void)
 {
   M2RTS_RegisterModule ("UnixArgs", _M2_UnixArgs_init, _M2_UnixArgs_finish,
_M2_UnixArgs_dep);
diff --git a/gcc/m2/gm2-libs-ch/dtoa.cc b/gcc/m2/gm2-libs-ch/dtoa.cc
index 57317588ba1..111fc4b67dc 100644
--- a/gcc/m2/gm2-libs-ch/dtoa.cc
+++ b/gcc/m2/gm2-libs-ch/dtoa.cc
@@ -189,9 +189,8 @@ _M2_dtoa_dep (void)
 #ifdef __cplusplus
 }
 
-struct _M2_dtoa_ctor { _M2_dtoa_ctor (); } _M2_dtoa_ctor;
-
-_M2_dtoa_ctor::_M2_dtoa_ctor (void)
+extern "C" void __attribute__((__constructor__))
+_M2_dtoa_ctor (void)
 {
   M2RTS_RegisterModule ("dtoa", _M2_dtoa_init, _M2_dtoa_finish,
_M2_dtoa_dep);
diff --git a/gcc/m2/gm2-libs-ch/ldtoa.cc b/gcc/m2/gm2-libs-ch/ldtoa.cc
index ac14297ec24..8d9e648fddf 100644
--- a/gcc/m2/gm2-libs-ch/ldtoa.cc
+++ b/gcc/m2/gm2-libs-ch/ldtoa.cc
@@ -118,9 +118,8 @@ _M2_ldtoa_dep (void)
 #   ifdef __cplusplus
 }
 
-struct _M2_ldtoa_ctor { _M2_ldtoa_ctor (); } _M2_ldtoa_ctor;
-
-_M2_ldtoa_ctor::_M2_ldtoa_ctor (void)
+extern "C" void __attribute__((__constructor__))
+_M2_ldtoa_ctor (void)
 {
   M2RTS_RegisterModule ("ldtoa", _M2_ldtoa_init, _M2_ldtoa_finish,
_M2_ldtoa_dep);
diff --git a/libgm2/libm2cor/KeyBoardLEDs.cc b/libgm2/libm2cor/KeyBoardLEDs.cc
index b640df67adc..bd0a4506a93 100644
--- a/libgm2/libm2cor/KeyBoardLEDs.cc
+++ b/libgm2/libm2cor/KeyBoardLEDs.cc
@@ -148,9 +148,8 @@ _M2_KeyBoardLEDs_dep (void)
 {
 }
 
-struct _M2_KeyBoardLEDs_ctor { _M2_KeyBoardLEDs_ctor (); } 
_M2_KeyBoardLEDs_ctor;
-
-_M2_KeyBoardLEDs_ctor::_M2_KeyBoardLEDs_ctor (void)
+extern "C" void __attribute__((__constructor__))
+ _M2_KeyBoardLEDs_ctor (void)
 {
   M2RTS_RegisterModule ("KeyBoardLEDs", _M2_KeyBoardLEDs_init, 
_M2_KeyBoardLEDs_finish,
_M2_KeyBoardLEDs_dep);
diff 

[PATCH] modula-2: Module registration constructors need to be visible [PR108259].

2023-01-02 Thread Iain Sandoe via Gcc-patches
Tested on x86_64-linux-gnu, x86_64,aarch64-darwin21.
There remain issues with shared libraries, but the link fails are fixed
by this.

OK for master?
Thanks
Iain

--- 8< ---

In the current design the main executable links explicitly to the module
registration construtors that it uses.  This means that they must be
visible in shared libraries.

PR modula2/108259

gcc/m2/ChangeLog:

* gm2-gcc/m2decl.cc (m2decl_DeclareModuleCtor): Make module
registration constructors visible.
---
 gcc/m2/gm2-gcc/m2decl.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/m2/gm2-gcc/m2decl.cc b/gcc/m2/gm2-gcc/m2decl.cc
index 62bfefd2530..d849f8aefc4 100644
--- a/gcc/m2/gm2-gcc/m2decl.cc
+++ b/gcc/m2/gm2-gcc/m2decl.cc
@@ -276,7 +276,7 @@ m2decl_DeclareModuleCtor (tree decl)
   /* Declare module_ctor ().  */
   TREE_PUBLIC (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
-  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
   DECL_VISIBILITY_SPECIFIED (decl) = 1;
   DECL_STATIC_CONSTRUCTOR (decl) = 1;
   return decl;
-- 
2.37.1 (Apple Git-137.1)



Re: Ping^3: [PATCH] jit: Install jit headers in $(libsubincludedir) [PR 101491]

2023-01-02 Thread Lorenzo Salvadore via Gcc-patches
Hello,

Ping https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606450.html

Thanks,

Lorenzo Salvadore

> From f8e2c2ee89a7d8741bb65163d1f1c20edcd546ac Mon Sep 17 00:00:00 2001
> From: Lorenzo Salvadore develo...@lorenzosalvadore.it
> 
> Date: Wed, 16 Nov 2022 11:27:38 +0100
> Subject: [PATCH] jit: Install jit headers in $(libsubincludedir) [PR 101491]
> 
> Installing jit/libgccjit.h and jit/libgccjit++.h headers in
> $(includedir) can be a problem for machines where multiple versions of
> GCC are required simultaneously, see for example this bug report on
> FreeBSD:
> 
> https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=257060
> 
> Hence,
> 
> - define $(libsubincludedir) the same way it is defined in libgomp;
> - install jit/libgccjit.h and jit/libgccjit++.h in $(libsubincludedir).
> 
> The patch has already been applied successfully in the official FreeBSD
> ports tree for the ports lang/gcc11 and lang/gcc12. Please see the
> following commits:
> 
> https://cgit.freebsd.org/ports/commit/?id=0338e04504ee269b7a95e6707f1314bc1c4239fe
> https://cgit.freebsd.org/ports/commit/?id=f1957296ed2dce8a09bb9582e9a5a715bf8b3d4d
> 
> gcc/ChangeLog:
> 
> 2022-11-16 Lorenzo Salvadore develo...@lorenzosalvadore.it
> 
> PR jit/101491
> * Makefile.in: Define and create $(libsubincludedir)
> 
> gcc/jit/ChangeLog:
> 
> 2022-11-16 Lorenzo Salvadore develo...@lorenzosalvadore.it
> 
> PR jit/101491
> * Make-lang.in: Install headers in $(libsubincludedir)
> ---
> gcc/Makefile.in | 3 +++
> gcc/jit/Make-lang.in | 4 ++--
> 2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index f672e6ea549..3bcf1c491ab 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -635,6 +635,8 @@ libexecdir = @libexecdir@
> 
> # Directory in which the compiler finds libraries etc.
> libsubdir = 
> $(libdir)/gcc/$(real_target_noncanonical)/$(version)$(accel_dir_suffix)
> +# Directory in which the compiler finds headers.
> +libsubincludedir = $(libdir)/gcc/$(target_alias)/$(version)/include
> # Directory in which the compiler finds executables
> libexecsubdir = 
> $(libexecdir)/gcc/$(real_target_noncanonical)/$(version)$(accel_dir_suffix)
> # Directory in which all plugin resources are installed
> @@ -3642,6 +3644,7 @@ install-cpp: installdirs cpp$(exeext)
> # $(libdir)/gcc/include isn't currently searched by cpp.
> installdirs:
> $(mkinstalldirs) $(DESTDIR)$(libsubdir)
> + $(mkinstalldirs) $(DESTDIR)$(libsubincludedir)
> $(mkinstalldirs) $(DESTDIR)$(libexecsubdir)
> $(mkinstalldirs) $(DESTDIR)$(bindir)
> $(mkinstalldirs) $(DESTDIR)$(includedir)
> diff --git a/gcc/jit/Make-lang.in b/gcc/jit/Make-lang.in
> index 248ec45b729..ba1b3e95da5 100644
> --- a/gcc/jit/Make-lang.in
> +++ b/gcc/jit/Make-lang.in
> @@ -360,9 +360,9 @@ selftest-jit:
> # Install hooks:
> jit.install-headers: installdirs
> $(INSTALL_DATA) $(srcdir)/jit/libgccjit.h \
> - $(DESTDIR)$(includedir)/libgccjit.h
> + $(DESTDIR)$(libsubincludedir)/libgccjit.h
> $(INSTALL_DATA) $(srcdir)/jit/libgccjit++.h \
> - $(DESTDIR)$(includedir)/libgccjit++.h
> + $(DESTDIR)$(libsubincludedir)/libgccjit++.h
> 
> ifneq (,$(findstring mingw,$(target)))
> jit.install-common: installdirs jit.install-headers
> --
> 2.38.0


[x86 PATCH] Improve ix86_expand_int_movcc to allow condition (mask) sharing.

2023-01-02 Thread Roger Sayle

This patch modifies the way that ix86_expand_int_movcc generates RTL,
to allow the condition mask to be shared/reused between multiple
conditional move sequences.  Such redundancy is common when RTL
if-conversion transforms non-trivial basic blocks.

As a motivating example, consider the new test case:

int a, b, c, d;
int foo(int x)
{
if (x == 0) {
a = 3;
b = 1;
c = 4;
d = 1;
} else {
a = 5;
b = 9;
c = 2;
d = 7;
}
return x;
}

This is currently compiled, with -O2, to:

foo:cmpl$1, %edi
movl%edi, %eax
sbbl%edi, %edi
andl$-2, %edi
addl$5, %edi
cmpl$1, %eax
sbbl%esi, %esi
movl%edi, a(%rip)
andl$-8, %esi
addl$9, %esi
cmpl$1, %eax
sbbl%ecx, %ecx
movl%esi, b(%rip)
andl$2, %ecx
addl$2, %ecx
cmpl$1, %eax
sbbl%edx, %edx
movl%ecx, c(%rip)
andl$-6, %edx
addl$7, %edx
movl%edx, d(%rip)
ret

Notice that the if-then-else blocks have been if-converted into four
conditional move sequences/assignments, each consisting of cmpl, sbbl,
andl and addl.  However, as the conditions are the same, the cmpl and
sbbl instructions used to generate the mask could be shared by CSE.

This patch enables that, so we now generate:

foo:cmpl$1, %edi
movl%edi, %eax
sbbl%edx, %edx
movl%edx, %edi
movl%edx, %esi
movl%edx, %ecx
andl$-6, %edx
andl$-2, %edi
andl$-8, %esi
andl$2, %ecx
addl$7, %edx
addl$5, %edi
addl$9, %esi
addl$2, %ecx
movl%edx, d(%rip)
movl%edi, a(%rip)
movl%esi, b(%rip)
movl%ecx, c(%rip)
ret

Notice, the code now contains only a single cmpl and a single sbbl,
with their result being shared (via movl).

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2023-01-02  Roger Sayle  

gcc/ChangeLog
* config/i386/i386-expand.cc (ix86_expand_int_movcc): Rewrite
RTL expansion to allow condition (mask) to be shared/reused,
by avoiding overwriting pseudos and adding REG_EQUAL notes.

gcc/testsuite/ChangeLog
* gcc.target/i386/cmov10.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 3eddbc9..4fd7c3c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3284,8 +3284,8 @@ ix86_expand_int_movcc (rtx operands[])
  || negate_cc_compare_p
  || ix86_expand_carry_flag_compare (code, op0, op1, _op))
{
- /* Detect overlap between destination and compare sources.  */
- rtx tmp = out;
+ /* Place comparison result in its own pseudo.  */
+ rtx tmp = gen_reg_rtx (mode);
 
  if (negate_cc_compare_p)
{
@@ -3295,7 +3295,6 @@ ix86_expand_int_movcc (rtx operands[])
emit_insn (gen_x86_negsi_ccc (gen_reg_rtx (SImode),
  gen_lowpart (SImode, op0)));
 
- tmp = gen_reg_rtx (mode);
  if (mode == DImode)
emit_insn (gen_x86_movdicc_0_m1_neg (tmp));
  else
@@ -3337,9 +3336,6 @@ ix86_expand_int_movcc (rtx operands[])
}
  diff = ct - cf;
 
- if (reg_overlap_mentioned_p (out, compare_op))
-   tmp = gen_reg_rtx (mode);
-
  if (mode == DImode)
emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
  else
@@ -3358,6 +3354,11 @@ ix86_expand_int_movcc (rtx operands[])
  tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
}
 
+ /* Add a REG_EQUAL note to allow condition to be shared.  */
+ rtx note = gen_rtx_fmt_ee (code, mode, op0, op1);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL,
+  gen_rtx_NEG (mode, note));
+
  if (diff == 1)
{
  /*
@@ -3368,9 +3369,8 @@ ix86_expand_int_movcc (rtx operands[])
   * Size 5 - 8.
   */
  if (ct)
-   tmp = expand_simple_binop (mode, PLUS,
-  tmp, GEN_INT (ct),
-  copy_rtx (tmp), 1, OPTAB_DIRECT);
+   tmp = expand_simple_binop (mode, PLUS, tmp, GEN_INT (ct),
+  NULL_RTX, 1, OPTAB_DIRECT);
}
  else if (cf == -1)
{
@@ -3381,9 +3381,8 @@ ix86_expand_int_movcc (rtx operands[])
   *
   * Size 8.
  

Re: [PING] Re: [PATCH 2/2] Corrected pr25521.c target matching.

2023-01-02 Thread Cupertino Miranda via Gcc-patches


PING PING

Cupertino Miranda writes:

> Cupertino Miranda via Gcc-patches writes:
>
>> gentle ping
>>
>> Cupertino Miranda writes:
>>
 On 12/2/22 10:52, Cupertino Miranda via Gcc-patches wrote:
> This commit is a follow up of bugzilla #107181.
> The commit /a0aafbc/ changed the default implementation of the
> SELECT_SECTION hook in order to match clang/llvm behaviour w.r.t the
> placement of `const volatile' objects.
> However, the following targets use target-specific selection functions
> and they choke on the testcase pr25521.c:
>   *rx - target sets its const variables as '.section C,"a",@progbits'.
 That's presumably a constant section.  We should instead twiddle the test 
 to
 recognize that section.
>>>
>>> Although @progbits is indeed a constant section, I believe it is
>>> more interesting to detect if the `rx' starts selecting more
>>> standard sections instead of the current @progbits.
>>> That was the reason why I opted to XFAIL instead of PASSing it.
>>> Can I keep it as such ?
>>>

>   *powerpc - its 32bit version is eager to allocate globals in .sdata
>  sections.
> Normally, one can expect for the variable to be allocated in .srodata,
> however, in case of powerpc-*-* or powerpc64-*-* (with -m32)
> 'targetm.have_srodata_section == false' and the code in
> categorize_decl_for_section(varasm.cc), forces it to allocate in .sdata.
>/* If the target uses small data sections, select it.  */
>else if (targetm.in_small_data_p (decl))
>  {
>if (ret == SECCAT_BSS)
>   ret = SECCAT_SBSS;
>else if targetm.have_srodata_section && ret == SECCAT_RODATA)
>   ret = SECCAT_SRODATA;
>else
>   ret = SECCAT_SDATA;
>  }
 I'd just skip the test for 32bit ppc.  There should be suitable 
 effective-target
 tests you can use.

 jeff


Re: [PING] Re: [PATCH 1/2] select .rodata for const volatile variables.

2023-01-02 Thread Cupertino Miranda via Gcc-patches


PING PING

Cupertino Miranda writes:

> Cupertino Miranda via Gcc-patches writes:
>
>> gentle ping
>>
>> Cupertino Miranda writes:
>>
>>> Hi Jeff,
>>>
>>> First of all thanks for your quick review.
>>> Apologies for the delay replying, the message got lost in my inbox.
>>>
 On 12/2/22 10:52, Cupertino Miranda via Gcc-patches wrote:
> Changed target code to select .rodata section for 'const volatile'
> defined variables.
> This change is in the context of the bugzilla #170181.
> gcc/ChangeLog:
>   v850.c(v850_select_section): Changed function.
 I'm not sure this is safe/correct.  ISTM that you need to look at the 
 underlying
 TREE_TYPE to check for const-volatile rather than TREE_SIDE_EFFECTS.
>>>
>>> I believe this was asked by Jose when he first sent the generic patches.
>>> Please notice my change is influenced by his original patch that does
>>> the same and was approved.
>>>
>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599348.html
>>> https://gcc.gnu.org/pipermail/gcc-patches/2022-September/602374.html
>>>

 Of secondary importance is the ChangeLog.  Just saying "Changed function"
 provides no real information.  Something like this would be better:

* config/v850/v850.c (v850_select_section): Put const volatile
objects into read-only sections.


 Jeff




> ---
>   gcc/config/v850/v850.cc | 1 -
>   1 file changed, 1 deletion(-)
> diff --git a/gcc/config/v850/v850.cc b/gcc/config/v850/v850.cc
> index c7d432990ab..e66893fede4 100644
> --- a/gcc/config/v850/v850.cc
> +++ b/gcc/config/v850/v850.cc
> @@ -2865,7 +2865,6 @@ v850_select_section (tree exp,
>   {
> int is_const;
> if (!TREE_READONLY (exp)
> -   || TREE_SIDE_EFFECTS (exp)
> || !DECL_INITIAL (exp)
> || (DECL_INITIAL (exp) != error_mark_node
> && !TREE_CONSTANT (DECL_INITIAL (exp


[PATCH] modula-2, driver: Implement handling for -static-libgm2.

2023-01-02 Thread Iain Sandoe via Gcc-patches
tested on x86_64-linux-gnu, x86_64,aarch64-darwin21,
OK for trunk?
thanks,
Iain

--- 8< ---

This was unimplemented so far.

gcc/ChangeLog:

* common.opt: Add -static-libgm2.
* config/darwin.h (LINK_SPEC): Handle static-libgm2.

gcc/m2/ChangeLog:

* gm2spec.cc (lang_specific_driver): Handle static-libgm2.
---
 gcc/common.opt  |  4 
 gcc/config/darwin.h |  7 ++-
 gcc/m2/gm2spec.cc   | 24 +++-
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index b01f7a7a4a2..0f3910cf5f6 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3630,6 +3630,10 @@ static-libgfortran
 Driver
 ; Documented for Fortran, but always accepted by driver.
 
+static-libgm2
+Driver
+; Documented for Modula-2, but always accepted by driver.
+
 static-libphobos
 Driver
 ; Documented for D, but always accepted by driver.
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 0ec882ffb54..a3edd7c922b 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -476,7 +476,12 @@ extern GTY(()) int darwin_ms_struct;
%{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos 
libgphobos.a%s)}\

%{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp
 libgomp.a%s)}\
%{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ 
libstdc++.a%s)}\
-   %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
+   %{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\
+   %{static|static-libgm2:%:replace-outfile(-lm2iso libm2iso.a%s)}\
+   %{static|static-libgm2:%:replace-outfile(-lm2min libm2min.a%s)}\
+   %{static|static-libgm2:%:replace-outfile(-lm2log libm2log.a%s)}\
+   %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)}\
+  %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
%{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
LINK_SYSROOT_SPEC \
   "%{mmacosx-version-min=*:-macosx_version_min %*} \
diff --git a/gcc/m2/gm2spec.cc b/gcc/m2/gm2spec.cc
index 4996fa49789..6a17114c9f3 100644
--- a/gcc/m2/gm2spec.cc
+++ b/gcc/m2/gm2spec.cc
@@ -585,6 +585,9 @@ lang_specific_driver (struct cl_decoded_option 
**in_decoded_options,
   /* Should the driver perform a link?  */
   bool linking = true;
 
+  /* Should the driver link the shared gm2 libs?  */
+  bool shared_libgm2 = true;
+
   /* "-lm" or "-lmath" if it appears on the command line.  */
   const struct cl_decoded_option *saw_math = NULL;
 
@@ -594,7 +597,8 @@ lang_specific_driver (struct cl_decoded_option 
**in_decoded_options,
   /* By default, we throw on the math library if we have one.  */
   int need_math = (MATH_LIBRARY[0] != '\0');
 
-  /* 1 if we should add -lpthread to the command-line.  */
+  /* 1 if we should add -lpthread to the command-line.
+FIXME: the default should be a configuration choice.  */
   int need_pthread = 1;
 
   /* True if we saw -static.  */
@@ -774,6 +778,16 @@ lang_specific_driver (struct cl_decoded_option 
**in_decoded_options,
 #endif
  break;
 
+   case OPT_static_libgm2:
+ shared_libgm2 = false;
+#ifdef HAVE_LD_STATIC_DYNAMIC
+ /* Remove -static-libgm2 from the command only if target supports
+LD_STATIC_DYNAMIC.  When not supported, it is left in so that a
+back-end target can use outfile substitution.  */
+ args[i] |= SKIPOPT;
+#endif
+ break;
+
case OPT_stdlib_:
  which_library = (stdcxxlib_kind) decoded_options[i].value;
  break;
@@ -877,8 +891,16 @@ lang_specific_driver (struct cl_decoded_option 
**in_decoded_options,
 
   if (linking)
 {
+#ifdef HAVE_LD_STATIC_DYNAMIC
+  if (allow_libraries && !shared_libgm2)
+   append_option (OPT_Wl_, LD_STATIC_OPTION, 1);
+#endif
   if (allow_libraries)
add_default_archives (libpath, libraries);
+#ifdef HAVE_LD_STATIC_DYNAMIC
+  if (allow_libraries && !shared_libgm2)
+   append_option (OPT_Wl_, LD_DYNAMIC_OPTION, 1);
+#endif
   /* Add `-lstdc++' if we haven't already done so.  */
 #ifdef HAVE_LD_STATIC_DYNAMIC
   if (library > 1 && !static_link)
-- 
2.37.1 (Apple Git-137.1)



Re: [PATCH] [OpenMP] GC unused SIMD clones

2023-01-02 Thread Tobias Burnus

On 25.11.22 03:13, Sandra Loosemore wrote:

This patch is a followup to my not-yet-reviewed patch
[PATCH v4] OpenMP: Generate SIMD clones for functions with "declare
target"


That patch got reviewed and went into mainline on Nov 15, 2022 as
https://gcc.gnu.org/r13-4309-g309e2d95e3b930c6f15c8a5346b913158404c76d



In comments on a previous iteration of that patch, I was asked to do
something to delete unused SIMD clones to avoid code bloat; this is it.

I've implemented something like a simple mark-and-sweep algorithm.
Clones that are used are marked at the point where the call is
generated in the vectorizer.  The loop that iterates over functions to
apply the passes after IPA is modified to defer processing of unmarked
clones, and anything left over is deleted.



Jakub referred to Honza for the review, who wrote yesterday off list (to
me and Sandra):


I am really sorry for taking so long time.  It was busy month for me
and I was not very keen about the idea, since we had such logic
implemented many years ago but removed it to be able to determine
functions to be output early and optimize code layout.

I see that this is not possible with current organization where
vectorization is run late, so I guess it does make sense to do what you
are doing.

Patch is OK,
Honza


Thanks for the review! (And to Sandra: thanks for the patch.)

I leave it to Sandra to commit her patch and only want to update the
gcc-patches@ email. However. I think we can expect a commit tomorrow.
(Today is a holiday at her place - as new year's day fell on a Sunday.)

Thanks and happy new year!

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] Ada, Darwin: Do not link libgcc statically on Darwin [PR108202].

2023-01-02 Thread Arnaud Charlet via Gcc-patches
> I would like to revise this patch to be more conservative (only applying to 
> Darwin 8 and 9).

This is OK, thanks (and Happy New Year!)

> > On 24 Dec 2022, at 19:00, Iain Sandoe via Gcc-patches 
> >  wrote:
> > 
> > Tested on i686, x86-64 darwin, x86_64-linux (with a 32b multilib).
> > OK for trunk?
> > Iain
> 
> revised:
> 
> [PATCH] Ada,Darwin: Do not link libgcc statically on Darwin 8 and 9  
> [PR108202].
> 
> Normally, GCC executables are built with -static-libstdc++ -static-libgcc
> on Darwin.  This is fine in most cases, because GCC executables typically
> do not use exceptions.   However gnat1 does use exceptions and also pulls
> in system libraries that are linked against the installed shared libgcc
> which contains the system unwinder.  This means that gnat1 effectively has
> two unwinder instances (which does not work reliably since the unwinders
> have global state).
> 
> A recent change in the initialization of FDEs has made this a hard error
> now on Darwin versions (8 and 9) with libgcc installed in /usr/lib (gnat1
> now hangs when an exception is thrown).
> 
> The solution is to link libgcc dynamically, picking up the installed
> system version.  To do this we strip -static-libgcc from the link flags.
> 
>   PR ada/108202
> 
> gcc/ada/ChangeLog:
> 
>   * gcc-interface/Make-lang.in (GCC_LINKERFLAGS, GCC_LDFLAGS):
>   Versions of ALL_LINKERFLAGS, LDFLAGS with -Werror and
>   -static-libgcc filtered out for Darwin8 and 9 (-Werror is filtered
>   out for other hosts).