Re: [PATCH V2] Emit funcall external declarations only if actually used.

2023-09-05 Thread Jose E. Marchesi via Gcc-patches


ping^

> ping
>
>> [Differences from V1:
>> - Prototype for call_from_call_insn moved before comment block.
>> - Reuse the `call' flag for SYMBOL_REF_LIBCALL.
>> - Fallback to check REG_CALL_DECL in non-direct calls.
>> - New test to check correct behavior for non-direct calls.]
>>
>> There are many places in GCC where alternative local sequences are
>> tried in order to determine what is the cheapest or best alternative
>> to use in the current target.  When any of these sequences involve a
>> libcall, the current implementation of emit_library_call_value_1
>> introduce a side-effect consisting on emitting an external declaration
>> for the funcall (such as __divdi3) which is thus emitted even if the
>> sequence that does the libcall is not retained.
>>
>> This is problematic in targets such as BPF, because the kernel loader
>> chokes on the spurious symbol __divdi3 and makes the resulting BPF
>> object unloadable.  Note that BPF objects are not linked before being
>> loaded.
>>
>> This patch changes emit_library_call_value_1 to mark the target
>> SYMBOL_REF as a libcall.  Then, the emission of the external
>> declaration is done in the first loop of final.cc:shorten_branches.
>> This happens only if the corresponding sequence has been kept.
>>
>> Regtested in x86_64-linux-gnu.
>> Tested with host x86_64-linux-gnu with target bpf-unknown-none.
>>
>> gcc/ChangeLog
>>
>>  * rtl.h (SYMBOL_REF_LIBCALL): Define.
>>  * calls.cc (emit_library_call_value_1): Do not emit external
>>  libcall declaration here.
>>  * final.cc (shorten_branches): Do it here.
>>
>> gcc/testsuite/ChangeLog
>>
>>  * gcc.target/bpf/divmod-libcall-1.c: New test.
>>  * gcc.target/bpf/divmod-libcall-2.c: Likewise.
>>  * gcc.c-torture/compile/libcall-2.c: Likewise.
>> ---
>>  gcc/calls.cc  |  9 +++---
>>  gcc/final.cc  | 30 +++
>>  gcc/rtl.h |  5 
>>  .../gcc.c-torture/compile/libcall-2.c |  8 +
>>  .../gcc.target/bpf/divmod-libcall-1.c | 19 
>>  .../gcc.target/bpf/divmod-libcall-2.c | 16 ++
>>  6 files changed, 83 insertions(+), 4 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.c-torture/compile/libcall-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c
>>
>> diff --git a/gcc/calls.cc b/gcc/calls.cc
>> index 1f3a6d5c450..219ea599b16 100644
>> --- a/gcc/calls.cc
>> +++ b/gcc/calls.cc
>> @@ -4388,9 +4388,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, 
>> rtx value,
>>  || argvec[i].partial != 0)
>>update_stack_alignment_for_call ([i].locate);
>>  
>> -  /* If this machine requires an external definition for library
>> - functions, write one out.  */
>> -  assemble_external_libcall (fun);
>> +  /* Mark the emitted target as a libcall.  This will be used by final
>> + in order to emit an external symbol declaration if the libcall is
>> + ever used.  */
>> +  SYMBOL_REF_LIBCALL (fun) = 1;
>>  
>>original_args_size = args_size;
>>args_size.constant = (aligned_upper_bound (args_size.constant
>> @@ -4735,7 +4736,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
>> value,
>> valreg,
>> old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
>>  
>> -  if (flag_ipa_ra)
>> +  if (flag_ipa_ra || SYMBOL_REF_LIBCALL (orgfun))
>>  {
>>rtx datum = orgfun;
>>gcc_assert (GET_CODE (datum) == SYMBOL_REF);
>> diff --git a/gcc/final.cc b/gcc/final.cc
>> index dd3e22547ac..2041e43fdd1 100644
>> --- a/gcc/final.cc
>> +++ b/gcc/final.cc
>> @@ -804,6 +804,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
>>  }
>>  
>>  
>> +static rtx call_from_call_insn (rtx_call_insn *insn);
>> +
>>  /* Make a pass over all insns and compute their actual lengths by shortening
>> any branches of variable length if possible.  */
>>  
>> @@ -850,6 +852,34 @@ shorten_branches (rtx_insn *first)
>>for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
>>  {
>>INSN_SHUID (insn) = i++;
>> +
>> +  /* If this is a `call' instruction implementing a libcall, and
>> + this machine requires an external definition for library
>> + functions, write one out.  */
>> +  if (CALL_P (insn))
>> +{
>> +  rtx x;
>> +
>> +  if ((x = call_from_call_insn (dyn_cast  (insn)))
>> +  && (x = XEXP (x, 0))
>> +  && MEM_P (x)
>> +  && (x = XEXP (x, 0))
>> +  && SYMBOL_REF_P (x)
>> +  && SYMBOL_REF_LIBCALL (x))
>> +{
>> +  /* Direct call.  */
>> +  assemble_external_libcall (x);
>> +}
>> +  else if ((x = find_reg_note (insn, REG_CALL_DECL, NULL_RTX))
>> +   && (x = XEXP (x, 0)))
>> +{
>> +  

Re: [PATCH V2] Emit funcall external declarations only if actually used.

2023-08-30 Thread Jose E. Marchesi via Gcc-patches


ping

> [Differences from V1:
> - Prototype for call_from_call_insn moved before comment block.
> - Reuse the `call' flag for SYMBOL_REF_LIBCALL.
> - Fallback to check REG_CALL_DECL in non-direct calls.
> - New test to check correct behavior for non-direct calls.]
>
> There are many places in GCC where alternative local sequences are
> tried in order to determine what is the cheapest or best alternative
> to use in the current target.  When any of these sequences involve a
> libcall, the current implementation of emit_library_call_value_1
> introduce a side-effect consisting on emitting an external declaration
> for the funcall (such as __divdi3) which is thus emitted even if the
> sequence that does the libcall is not retained.
>
> This is problematic in targets such as BPF, because the kernel loader
> chokes on the spurious symbol __divdi3 and makes the resulting BPF
> object unloadable.  Note that BPF objects are not linked before being
> loaded.
>
> This patch changes emit_library_call_value_1 to mark the target
> SYMBOL_REF as a libcall.  Then, the emission of the external
> declaration is done in the first loop of final.cc:shorten_branches.
> This happens only if the corresponding sequence has been kept.
>
> Regtested in x86_64-linux-gnu.
> Tested with host x86_64-linux-gnu with target bpf-unknown-none.
>
> gcc/ChangeLog
>
>   * rtl.h (SYMBOL_REF_LIBCALL): Define.
>   * calls.cc (emit_library_call_value_1): Do not emit external
>   libcall declaration here.
>   * final.cc (shorten_branches): Do it here.
>
> gcc/testsuite/ChangeLog
>
>   * gcc.target/bpf/divmod-libcall-1.c: New test.
>   * gcc.target/bpf/divmod-libcall-2.c: Likewise.
>   * gcc.c-torture/compile/libcall-2.c: Likewise.
> ---
>  gcc/calls.cc  |  9 +++---
>  gcc/final.cc  | 30 +++
>  gcc/rtl.h |  5 
>  .../gcc.c-torture/compile/libcall-2.c |  8 +
>  .../gcc.target/bpf/divmod-libcall-1.c | 19 
>  .../gcc.target/bpf/divmod-libcall-2.c | 16 ++
>  6 files changed, 83 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/compile/libcall-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c
>
> diff --git a/gcc/calls.cc b/gcc/calls.cc
> index 1f3a6d5c450..219ea599b16 100644
> --- a/gcc/calls.cc
> +++ b/gcc/calls.cc
> @@ -4388,9 +4388,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>   || argvec[i].partial != 0)
>update_stack_alignment_for_call ([i].locate);
>  
> -  /* If this machine requires an external definition for library
> - functions, write one out.  */
> -  assemble_external_libcall (fun);
> +  /* Mark the emitted target as a libcall.  This will be used by final
> + in order to emit an external symbol declaration if the libcall is
> + ever used.  */
> +  SYMBOL_REF_LIBCALL (fun) = 1;
>  
>original_args_size = args_size;
>args_size.constant = (aligned_upper_bound (args_size.constant
> @@ -4735,7 +4736,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>  valreg,
>  old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
>  
> -  if (flag_ipa_ra)
> +  if (flag_ipa_ra || SYMBOL_REF_LIBCALL (orgfun))
>  {
>rtx datum = orgfun;
>gcc_assert (GET_CODE (datum) == SYMBOL_REF);
> diff --git a/gcc/final.cc b/gcc/final.cc
> index dd3e22547ac..2041e43fdd1 100644
> --- a/gcc/final.cc
> +++ b/gcc/final.cc
> @@ -804,6 +804,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
>  }
>  
>  
> +static rtx call_from_call_insn (rtx_call_insn *insn);
> +
>  /* Make a pass over all insns and compute their actual lengths by shortening
> any branches of variable length if possible.  */
>  
> @@ -850,6 +852,34 @@ shorten_branches (rtx_insn *first)
>for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
>  {
>INSN_SHUID (insn) = i++;
> +
> +  /* If this is a `call' instruction implementing a libcall, and
> + this machine requires an external definition for library
> + functions, write one out.  */
> +  if (CALL_P (insn))
> +{
> +  rtx x;
> +
> +  if ((x = call_from_call_insn (dyn_cast  (insn)))
> +  && (x = XEXP (x, 0))
> +  && MEM_P (x)
> +  && (x = XEXP (x, 0))
> +  && SYMBOL_REF_P (x)
> +  && SYMBOL_REF_LIBCALL (x))
> +{
> +  /* Direct call.  */
> +  assemble_external_libcall (x);
> +}
> +  else if ((x = find_reg_note (insn, REG_CALL_DECL, NULL_RTX))
> +   && (x = XEXP (x, 0)))
> +{
> +  /* Indirect call with REG_CALL_DECL note.  */
> +  gcc_assert (SYMBOL_REF_P (x));
> +  if 

Re: [PATCH V4] Add warning options -W[no-]compare-distinct-pointer-types

2023-08-24 Thread Jose E. Marchesi via Gcc-patches


Hi Marek.

> On Thu, Aug 17, 2023 at 05:37:03PM +0200, Jose E. Marchesi via Gcc-patches 
> wrote:
>> 
>> > On Thu, 17 Aug 2023, Jose E. Marchesi via Gcc-patches wrote:
>> >
>> >> +@opindex Wcompare-distinct-pointer-types
>> >> +@item -Wcompare-distinct-pointer-types
>> >
>> > This @item should say @r{(C and Objective-C only)}, since the option isn't 
>> > implemented for C++.  OK with that change.
>> 
>> Pushed with that change.
>> Thanks for the prompt review!
>
> I see the following failures:
>
> FAIL: gcc.c-torture/compile/pr106537-1.c   -Os   (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-1.c   -Os   (test for warnings, line 30)
> FAIL: gcc.c-torture/compile/pr106537-1.c -O2 -flto
> -fno-use-linker-plugin -flto-partition=none (test for warnings, line
> 28)
> FAIL: gcc.c-torture/compile/pr106537-1.c -O2 -flto
> -fno-use-linker-plugin -flto-partition=none (test for warnings, line
> 30)
> FAIL: gcc.c-torture/compile/pr106537-1.c -O2 -flto -fuse-linker-plugin
> -fno-fat-lto-objects (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-1.c -O2 -flto -fuse-linker-plugin
> -fno-fat-lto-objects (test for warnings, line 30)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O0   (test for warnings, line 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O0   (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O1   (test for warnings, line 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O1   (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O2   (test for warnings, line 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O2   (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O3 -g   (test for warnings, line 
> 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -O3 -g   (test for warnings, line 
> 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -Os   (test for warnings, line 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c   -Os   (test for warnings, line 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c -O2 -flto
> -fno-use-linker-plugin -flto-partition=none (test for warnings, line
> 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c -O2 -flto
> -fno-use-linker-plugin -flto-partition=none (test for warnings, line
> 28)
> FAIL: gcc.c-torture/compile/pr106537-2.c -O2 -flto -fuse-linker-plugin
> -fno-fat-lto-objects (test for warnings, line 26)
> FAIL: gcc.c-torture/compile/pr106537-2.c -O2 -flto -fuse-linker-plugin
> -fno-fat-lto-objects (test for warnings, line 28)
>
> The problem is that for ==/!=, when one of the types is void*,
> build_binary_op goes to the branch attempting to warn about
> comparing void* with a function pointer, and never gets to the 
> -Wcompare-distinct-pointer-types warning.

Oof I wonder what happened with my regtesting.

I just pushed the patch below as obvious, which adjusts the tests to
conform to GCC's behavior of not emitting that pedwarn for
equality/inequality of void pointers with non-function pointers.

Sorry about this.  And thanks for reporting.

>From 721f7e2c4e5eed645593258624dd91e6c39f3bd2 Mon Sep 17 00:00:00 2001
From: "Jose E. Marchesi" 
Date: Thu, 24 Aug 2023 17:10:52 +0200
Subject: [PATCH] Fix tests for PR 106537.

This patch fixes the tests for PR 106537 (support for
-W[no]-compare-distinct-pointer-types) which were expecting the
warning when checking for equality/inequality of void pointers with
non-function pointers.

gcc/testsuite/ChangeLog:

PR c/106537
* gcc.c-torture/compile/pr106537-1.c: Comparing void pointers to
non-function pointers is legit.
* gcc.c-torture/compile/pr106537-2.c: Likewise.
---
 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c | 6 --
 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr106537-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
index 3f3b06577d5..b67b6090dc3 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
@@ -25,9 +25,11 @@ int xdp_context (struct xdp_md *xdp)
 return 3;
   if (metadata + 1 <= data) /* { dg-warning "comparison of distinct pointer 
types" } */
 return 4;
-  if (metadata + 1 == data) /* { dg-warning "comparison of distinct pointer 
types" } */
+  /* Note that it is ok to check for equality or inequality betewen void
+ pointers and any other non-function pointers.  */
+  if ((int*) (metadata + 1) == (long*) data) /* { dg-warning "comparison of 
distinct pointer types" } */
 return 5;
-  if (metadata + 1 != data) /* { dg-warning "comparison of distinct pointer 
types" } */
+  if ((int*) metadata + 1 != (long*) d

Re: [PATCH] bpf: neg instruction does not accept an immediate

2023-08-21 Thread Jose E. Marchesi via Gcc-patches


> The BPF virtual machine does not support neg nor neg32 instructions with
> an immediate.
>
> The erroneous instructions were removed from binutils:
> https://sourceware.org/pipermail/binutils/2023-August/129135.html
>
> Change the define_insn so that an immediate cannot be accepted.
>
> From testing, a neg-immediate was probably never chosen over a
> mov-immediate anyway.

OK.
Thanks!

>
> Tested on x86_64-linux-gnu host for bpf-unknown-none target.
>
> gcc/
>
>   * config/bpf/bpf.md (neg): Second operand must be a register.
> ---
>  gcc/config/bpf/bpf.md | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index a64de1095ed..e87d72182bb 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -163,8 +163,8 @@ (define_insn "sub3"
>  
>  ;;; Negation
>  (define_insn "neg2"
> -  [(set (match_operand:AM 0 "register_operand"   "=r,r")
> -(neg:AM (match_operand:AM 1 "reg_or_imm_operand" " 0,I")))]
> +  [(set (match_operand:AM 0 "register_operand" "=r")
> +(neg:AM (match_operand:AM 1 "register_operand" " 0")))]
>""
>"{neg\t%0|%w0 = -%w1}"
>[(set_attr "type" "")])


[PATCH V2] Emit funcall external declarations only if actually used.

2023-08-21 Thread Jose E. Marchesi via Gcc-patches
[Differences from V1:
- Prototype for call_from_call_insn moved before comment block.
- Reuse the `call' flag for SYMBOL_REF_LIBCALL.
- Fallback to check REG_CALL_DECL in non-direct calls.
- New test to check correct behavior for non-direct calls.]

There are many places in GCC where alternative local sequences are
tried in order to determine what is the cheapest or best alternative
to use in the current target.  When any of these sequences involve a
libcall, the current implementation of emit_library_call_value_1
introduce a side-effect consisting on emitting an external declaration
for the funcall (such as __divdi3) which is thus emitted even if the
sequence that does the libcall is not retained.

This is problematic in targets such as BPF, because the kernel loader
chokes on the spurious symbol __divdi3 and makes the resulting BPF
object unloadable.  Note that BPF objects are not linked before being
loaded.

This patch changes emit_library_call_value_1 to mark the target
SYMBOL_REF as a libcall.  Then, the emission of the external
declaration is done in the first loop of final.cc:shorten_branches.
This happens only if the corresponding sequence has been kept.

Regtested in x86_64-linux-gnu.
Tested with host x86_64-linux-gnu with target bpf-unknown-none.

gcc/ChangeLog

* rtl.h (SYMBOL_REF_LIBCALL): Define.
* calls.cc (emit_library_call_value_1): Do not emit external
libcall declaration here.
* final.cc (shorten_branches): Do it here.

gcc/testsuite/ChangeLog

* gcc.target/bpf/divmod-libcall-1.c: New test.
* gcc.target/bpf/divmod-libcall-2.c: Likewise.
* gcc.c-torture/compile/libcall-2.c: Likewise.
---
 gcc/calls.cc  |  9 +++---
 gcc/final.cc  | 30 +++
 gcc/rtl.h |  5 
 .../gcc.c-torture/compile/libcall-2.c |  8 +
 .../gcc.target/bpf/divmod-libcall-1.c | 19 
 .../gcc.target/bpf/divmod-libcall-2.c | 16 ++
 6 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/libcall-2.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1f3a6d5c450..219ea599b16 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -4388,9 +4388,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
|| argvec[i].partial != 0)
   update_stack_alignment_for_call ([i].locate);
 
-  /* If this machine requires an external definition for library
- functions, write one out.  */
-  assemble_external_libcall (fun);
+  /* Mark the emitted target as a libcall.  This will be used by final
+ in order to emit an external symbol declaration if the libcall is
+ ever used.  */
+  SYMBOL_REF_LIBCALL (fun) = 1;
 
   original_args_size = args_size;
   args_size.constant = (aligned_upper_bound (args_size.constant
@@ -4735,7 +4736,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
   valreg,
   old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
 
-  if (flag_ipa_ra)
+  if (flag_ipa_ra || SYMBOL_REF_LIBCALL (orgfun))
 {
   rtx datum = orgfun;
   gcc_assert (GET_CODE (datum) == SYMBOL_REF);
diff --git a/gcc/final.cc b/gcc/final.cc
index dd3e22547ac..2041e43fdd1 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -804,6 +804,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
 }
 
 
+static rtx call_from_call_insn (rtx_call_insn *insn);
+
 /* Make a pass over all insns and compute their actual lengths by shortening
any branches of variable length if possible.  */
 
@@ -850,6 +852,34 @@ shorten_branches (rtx_insn *first)
   for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
 {
   INSN_SHUID (insn) = i++;
+
+  /* If this is a `call' instruction implementing a libcall, and
+ this machine requires an external definition for library
+ functions, write one out.  */
+  if (CALL_P (insn))
+{
+  rtx x;
+
+  if ((x = call_from_call_insn (dyn_cast  (insn)))
+  && (x = XEXP (x, 0))
+  && MEM_P (x)
+  && (x = XEXP (x, 0))
+  && SYMBOL_REF_P (x)
+  && SYMBOL_REF_LIBCALL (x))
+{
+  /* Direct call.  */
+  assemble_external_libcall (x);
+}
+  else if ((x = find_reg_note (insn, REG_CALL_DECL, NULL_RTX))
+   && (x = XEXP (x, 0)))
+{
+  /* Indirect call with REG_CALL_DECL note.  */
+  gcc_assert (SYMBOL_REF_P (x));
+  if (SYMBOL_REF_LIBCALL (x))
+assemble_external_libcall (x);
+}
+}
+
   if (INSN_P (insn))
continue;
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index e1c51156f90..28be708a55f 100644

Re: [PATCH] Emit funcall external declarations only if actually used.

2023-08-18 Thread Jose E. Marchesi via Gcc-patches


Hi Jakub.
Thanks for the review.

> On Fri, Aug 18, 2023 at 03:53:51PM +0200, Jose E. Marchesi via Gcc-patches 
> wrote:
>> --- a/gcc/final.cc
>> +++ b/gcc/final.cc
>> @@ -815,6 +815,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
>> reorg.cc, since the branch splitting exposes new instructions with delay
>> slots.  */
>>  
>> +static rtx call_from_call_insn (rtx_call_insn *insn);
>> +
>
> I'd say the forward declaration should go before the function comment, so
> that it is clear the function comment talks about shorten_branches.

Will do.

>
>>  void
>>  shorten_branches (rtx_insn *first)
>>  {
>> @@ -850,6 +852,20 @@ shorten_branches (rtx_insn *first)
>>for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
>>  {
>>INSN_SHUID (insn) = i++;
>> +
>> +  /* If this is a `call' instruction implementing a libcall,
>> + and this machine requires an external definition for library
>> + functions, write one out.  */
>> +  if (CALL_P (insn))
>> +{
>> +  rtx x = call_from_call_insn (dyn_cast  (insn));
>> +  x = XEXP (x, 0);
>> +  if (x && MEM_P (x)
>
> When all conditions don't fit on one line, each && condition should be on
> its own line.

Will fix.

>
>> +  && SYMBOL_REF_P (XEXP (x, 0))
>> +  && SYMBOL_REF_LIBCALL (XEXP (x, 0)))
>> +assemble_external_libcall (XEXP (x, 0));
>> +}
>
> This won't work if target can't use a direct call instruction.
> Consider
> __int128 a, b; void foo () { a = a / b; }
> on x86_64-linux.  With just -O2, the above works fine, with
> -O2 -mcmodel=large it will not, the call is indirect, but at least one has
> REG_CALL_DECL note that could be used as fallback to the above.
> And with -O0 -mcmodel=large because flag_ipa_ra is false REG_CALL_DECL isn't
> emitted at all.
> So, perhaps you could emit the REG_CALL_DECL note even if !flag_ipa_ra
> when SYMBOL_REF_LIBCALL is set?

Hmm something like this?

(I am aware that as things stand in emit_library_call_value_1 that
 conditional will be always true, but I think it is good to keep the
 conditional as documentation and in case emit_library_call_value_1
 changes in the future.  Note also that `fun' is known to be `orgfun'
 when the bit it set.  That may change later as per
 prepare_call_address.)

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1f3a6d5c450..219ea599b16 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -4388,9 +4388,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
|| argvec[i].partial != 0)
   update_stack_alignment_for_call ([i].locate);
 
-  /* If this machine requires an external definition for library
- functions, write one out.  */
-  assemble_external_libcall (fun);
+  /* Mark the emitted target as a libcall.  This will be used by final
+ in order to emit an external symbol declaration if the libcall is
+ ever used.  */
+  SYMBOL_REF_LIBCALL (fun) = 1;
 
   original_args_size = args_size;
   args_size.constant = (aligned_upper_bound (args_size.constant
@@ -4735,7 +4736,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
   valreg,
   old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
 
-  if (flag_ipa_ra)
+  if (flag_ipa_ra || SYMBOL_REF_LIBCALL (orgfun))
 {
   rtx datum = orgfun;
   gcc_assert (GET_CODE (datum) == SYMBOL_REF);
diff --git a/gcc/final.cc b/gcc/final.cc
index dd3e22547ac..53f5d890809 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -804,6 +804,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
 }
 
 
+static rtx call_from_call_insn (rtx_call_insn *insn);
+
 /* Make a pass over all insns and compute their actual lengths by shortening
any branches of variable length if possible.  */
 
@@ -850,6 +852,19 @@ shorten_branches (rtx_insn *first)
   for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
 {
   INSN_SHUID (insn) = i++;
+
+  /* If this is a `call' instruction or implementing a libcall,
+ and this machine requires an external definition for library
+ functions, write one out.  */
+  if (CALL_P (insn))
+{
+  rtx x = call_from_call_insn (dyn_cast  (insn));
+
+  if ((x = XEXP (x, 0)) && MEM_P (x) && SYMBOL_REF_P (XEXP (x, 0))
+  || (x = find_reg_note (insn, REG_CALL_DECL, NULL_RTX)))
+assemble_external_libcall (XEXP (x, 0));
+}
+
   if (INSN_P (insn))
continue;
 
>> diff --git a/gcc/rtl.h b/gcc/rtl.h
>> index e1c51156f90..945e3267a34 100644
>> --- a/gcc/rtl.h
>> +++ b/gcc/rtl.h
>> @@ -402,6 +402,8 @@ struct GTY((desc("0"), tag(&q

[PATCH] Emit funcall external declarations only if actually used.

2023-08-18 Thread Jose E. Marchesi via Gcc-patches
[Previous thread:
 https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608162.html]

There are many places in GCC where alternative local sequences are
tried in order to determine what is the cheapest or best alternative
to use in the current target.  When any of these sequences involve a
libcall, the current implementation of emit_library_call_value_1
introduce a side-effect consisting on emitting an external declaration
for the funcall (such as __divdi3) which is thus emitted even if the
sequence that does the libcall is not retained.

This is problematic in targets such as BPF, because the kernel loader
chokes on the spurious symbol __divdi3 and makes the resulting BPF
object unloadable.  Note that BPF objects are not linked before being
loaded.

This patch changes emit_library_call_value_1 to mark the target
SYMBOL_REF as a libcall.  Then, the emission of the external
declaration is done in the first loop of final.cc:shorten_branches.
This happens only if the corresponding sequence has been kept.

Regtested in x86_64-linux-gnu.
Tested with host x86_64-linux-gnu with target bpf-unknown-none.

gcc/ChangeLog

* rtl.h: New flag is_libcall.
(SYMBOL_REF_LIBCALL): Define.
* calls.cc (emit_library_call_value_1): Do not emit external
libcall declaration here.
* final.cc (shorten_branches): Do it here.

gcc/testsuite/ChangeLog

* gcc.target/bpf/divmod-libcall-1.c: New test.
* gcc.target/bpf/divmod-libcall-2.c: Likewise.
---
 gcc/calls.cc  |  7 ---
 gcc/final.cc  | 16 
 gcc/rtl.h |  6 ++
 .../gcc.target/bpf/divmod-libcall-1.c | 19 +++
 .../gcc.target/bpf/divmod-libcall-2.c | 16 
 5 files changed, 61 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-libcall-2.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1f3a6d5c450..e0ddda42442 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -4388,9 +4388,10 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
|| argvec[i].partial != 0)
   update_stack_alignment_for_call ([i].locate);
 
-  /* If this machine requires an external definition for library
- functions, write one out.  */
-  assemble_external_libcall (fun);
+  /* Mark the emitted target as a libcall.  This will be used by final
+ in order to emit an external symbol declaration if the libcall is
+ ever used.  */
+  SYMBOL_REF_LIBCALL (fun) = 1;
 
   original_args_size = args_size;
   args_size.constant = (aligned_upper_bound (args_size.constant
diff --git a/gcc/final.cc b/gcc/final.cc
index dd3e22547ac..80c112b91f7 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -815,6 +815,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
reorg.cc, since the branch splitting exposes new instructions with delay
slots.  */
 
+static rtx call_from_call_insn (rtx_call_insn *insn);
+
 void
 shorten_branches (rtx_insn *first)
 {
@@ -850,6 +852,20 @@ shorten_branches (rtx_insn *first)
   for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
 {
   INSN_SHUID (insn) = i++;
+
+  /* If this is a `call' instruction implementing a libcall,
+ and this machine requires an external definition for library
+ functions, write one out.  */
+  if (CALL_P (insn))
+{
+  rtx x = call_from_call_insn (dyn_cast  (insn));
+  x = XEXP (x, 0);
+  if (x && MEM_P (x)
+  && SYMBOL_REF_P (XEXP (x, 0))
+  && SYMBOL_REF_LIBCALL (XEXP (x, 0)))
+assemble_external_libcall (XEXP (x, 0));
+}
+
   if (INSN_P (insn))
continue;
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index e1c51156f90..945e3267a34 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -402,6 +402,8 @@ struct GTY((desc("0"), tag("0"),
  1 in a VALUE or DEBUG_EXPR is NO_LOC_P in var-tracking.cc.
  Dumped as "/i" in RTL dumps.  */
   unsigned return_val : 1;
+  /* 1 in a SYMBOL_REF if it is the target of a libcall.  */
+  unsigned is_libcall : 1;
 
   union {
 /* The final union field is aligned to 64 bits on LP64 hosts,
@@ -2734,6 +2736,10 @@ do { 
\
 #define SYMBOL_REF_USED(RTX)   \
   (RTL_FLAG_CHECK1 ("SYMBOL_REF_USED", (RTX), SYMBOL_REF)->used)
 
+/* 1 if RTX is a symbol_ref that represents a libcall target.  */
+#define SYMBOL_REF_LIBCALL(RTX) \
+  (RTL_FLAG_CHECK1 ("SYMBOL_REF_LIBCALL", (RTX), SYMBOL_REF)->is_libcall)
+
 /* 1 if RTX is a symbol_ref for a weak symbol.  */
 #define SYMBOL_REF_WEAK(RTX)   \
   (RTL_FLAG_CHECK1 ("SYMBOL_REF_WEAK", (RTX), SYMBOL_REF)->return_val)
diff --git 

[COMMITTED] bpf: bump maximum frame size limit to 32767 bytes

2023-08-18 Thread Jose E. Marchesi via Gcc-patches
This commit bumps the maximum stack frame size allowed for BPF
functions to the maximum possible value.

Tested in x86_64-linux-gnu host and target bpf-unknown-none.

gcc/ChangeLog

* config/bpf/bpf.opt (mframe-limit): Set default to 32767.

gcc/testsuite/ChangeLog

* gcc.target/bpf/frame-limit-1.c: New test.
* gcc.target/bpf/frame-limit-2.c: Likewise.
---
 gcc/config/bpf/bpf.opt   |  2 +-
 gcc/testsuite/gcc.target/bpf/frame-limit-1.c | 18 ++
 gcc/testsuite/gcc.target/bpf/frame-limit-2.c | 16 
 3 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/frame-limit-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/frame-limit-2.c

diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
index 8e240d397e4..efa0380ee3f 100644
--- a/gcc/config/bpf/bpf.opt
+++ b/gcc/config/bpf/bpf.opt
@@ -38,7 +38,7 @@ Target RejectNegative InverseMask(BIG_ENDIAN)
 Generate little-endian eBPF.
 
 mframe-limit=
-Target Joined RejectNegative UInteger IntegerRange(0, 32767) 
Var(bpf_frame_limit) Init(512)
+Target Joined RejectNegative UInteger IntegerRange(0, 32767) 
Var(bpf_frame_limit) Init(32767)
 Set a hard limit for the size of each stack frame, in bytes.
 
 mco-re
diff --git a/gcc/testsuite/gcc.target/bpf/frame-limit-1.c 
b/gcc/testsuite/gcc.target/bpf/frame-limit-1.c
new file mode 100644
index 000..7843e04b5ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/frame-limit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+/* The stack frame size is limited to 32767 bytes.  */
+
+int
+foo ()
+{
+  long data[4095];
+  return 0;
+}
+
+int
+bar ()
+{
+  long data[4096];
+  return 0;
+} /* { dg-error "stack limit" } */
diff --git a/gcc/testsuite/gcc.target/bpf/frame-limit-2.c 
b/gcc/testsuite/gcc.target/bpf/frame-limit-2.c
new file mode 100644
index 000..57f82e00567
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/frame-limit-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mframe-limit=256" } */
+
+int
+foo ()
+{
+  long data[32];
+  return 0;
+}
+
+int
+bar ()
+{
+  long data[33];
+  return 0;
+} /* { dg-error "stack limit" } */
-- 
2.30.2



Re: [PATCH V4] Add warning options -W[no-]compare-distinct-pointer-types

2023-08-17 Thread Jose E. Marchesi via Gcc-patches


> On Thu, 17 Aug 2023, Jose E. Marchesi via Gcc-patches wrote:
>
>> +@opindex Wcompare-distinct-pointer-types
>> +@item -Wcompare-distinct-pointer-types
>
> This @item should say @r{(C and Objective-C only)}, since the option isn't 
> implemented for C++.  OK with that change.

Pushed with that change.
Thanks for the prompt review!


[PATCH V4] Add warning options -W[no-]compare-distinct-pointer-types

2023-08-17 Thread Jose E. Marchesi via Gcc-patches
[Changes from V3:
- Previous thread:
  https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600625.html
- The tests have been augmented to check all six relational
  operators.  In particular it covers both code paths impacted
  by the patch: the equality/inequality and the relational ops.]

GCC emits pedwarns unconditionally when comparing pointers of
different types, for example:

  int xdp_context (struct xdp_md *xdp)
{
void *data = (void *)(long)xdp->data;
__u32 *metadata = (void *)(long)xdp->data_meta;
__u32 ret;

if (metadata + 1 > data)
  return 0;
return 1;
   }

  /home/jemarch/foo.c: In function ‘xdp_context’:
  /home/jemarch/foo.c:15:20: warning: comparison of distinct pointer types 
lacks a cast
 15 |   if (metadata + 1 > data)
 |^

LLVM supports an option -W[no-]compare-distinct-pointer-types that can
be used in order to enable or disable the emission of such warnings.
It is enabled by default.

This patch adds the same options to GCC.

Documentation and testsuite updated included.
Regtested in x86_64-linu-gnu.
No regressions observed.

gcc/ChangeLog:

PR c/106537
* doc/invoke.texi (Option Summary): Mention
-Wcompare-distinct-pointer-types under `Warning Options'.
(Warning Options): Document -Wcompare-distinct-pointer-types.

gcc/c-family/ChangeLog:

PR c/106537
* c.opt (Wcompare-distinct-pointer-types): New option.

gcc/c/ChangeLog:

PR c/106537
* c-typeck.cc (build_binary_op): Warning on comparing distinct
pointer types only when -Wcompare-distinct-pointer-types.

gcc/testsuite/ChangeLog:

PR c/106537
* gcc.c-torture/compile/pr106537-1.c: New test.
* gcc.c-torture/compile/pr106537-2.c: Likewise.
* gcc.c-torture/compile/pr106537-3.c: Likewise.
---
 gcc/c-family/c.opt|  4 +++
 gcc/c/c-typeck.cc |  6 ++--
 gcc/doc/invoke.texi   |  6 
 .../gcc.c-torture/compile/pr106537-1.c| 34 +++
 .../gcc.c-torture/compile/pr106537-2.c| 32 +
 .../gcc.c-torture/compile/pr106537-3.c| 32 +
 6 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-3.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index c7b567ba7ab..2242524cd3e 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1935,6 +1935,10 @@ Winvalid-imported-macros
 C++ ObjC++ Var(warn_imported_macros) Warning
 Warn about macros that have conflicting header units definitions.
 
+Wcompare-distinct-pointer-types
+C ObjC Var(warn_compare_distinct_pointer_types) Warning Init(1)
+Warn if pointers of distinct types are compared without a cast.
+
 flang-info-include-translate
 C++ Var(note_include_translate_yes)
 Note #include directives translated to import declarations.
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 6f2fff51683..e6ddf37d412 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -12772,7 +12772,7 @@ build_binary_op (location_t location, enum tree_code 
code,
  else
/* Avoid warning about the volatile ObjC EH puts on decls.  */
if (!objc_ok)
- pedwarn (location, 0,
+ pedwarn (location, OPT_Wcompare_distinct_pointer_types,
   "comparison of distinct pointer types lacks a cast");
 
  if (result_type == NULL_TREE)
@@ -12912,8 +12912,8 @@ build_binary_op (location_t location, enum tree_code 
code,
  int qual = ENCODE_QUAL_ADDR_SPACE (as_common);
  result_type = build_pointer_type
  (build_qualified_type (void_type_node, qual));
- pedwarn (location, 0,
-  "comparison of distinct pointer types lacks a cast");
+  pedwarn (location, OPT_Wcompare_distinct_pointer_types,
+   "comparison of distinct pointer types lacks a cast");
}
}
   else if (code0 == POINTER_TYPE && null_pointer_constant_p (orig_op1))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3380ed8bd6f..28ee6fb62bb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -345,6 +345,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wcast-align  -Wcast-align=strict  -Wcast-function-type  -Wcast-qual
 -Wchar-subscripts
 -Wclobbered  -Wcomment
+-Wcompare-distinct-pointer-types
 -Wno-complain-wrong-lang
 -Wconversion  -Wno-coverage-mismatch  -Wno-cpp
 -Wdangling-else  -Wdangling-pointer  -Wdangling-pointer=@var{n}
@@ -9106,6 +9107,11 @@ The latter front end diagnoses
 @samp{f951: Warning: command-line option '-fno-rtti' is valid for C++/D/ObjC++ 
but not for Fortran},
 which may be disabled 

[COMMITTED] bpf: support `naked' function attributes in BPF targets

2023-08-17 Thread Jose E. Marchesi via Gcc-patches
The kernel selftests and other BPF programs make extensive use of the
`naked' function attribute with bodies written using basic inline
assembly.  This patch adds support for the attribute to
bpf-unkonwn-none, makes it to inhibit warnings due to lack of explicit
`return' statement, and updates documentation and testsuite
accordingly.

Tested in x86_64-linux-gnu host and bpf-unknown-none target.

gcc/ChangeLog

PR target/111046
* config/bpf/bpf.cc (bpf_attribute_table): Add entry for the
`naked' function attribute.
(bpf_warn_func_return): New function.
(TARGET_WARN_FUNC_RETURN): Define.
(bpf_expand_prologue): Add preventive comment.
(bpf_expand_epilogue): Likewise.
* doc/extend.texi (BPF Function Attributes): Document the `naked'
function attribute.

gcc/testsuite/ChangeLog

* gcc.target/bpf/naked-1.c: New test.
---
 gcc/config/bpf/bpf.cc  | 25 +
 gcc/doc/extend.texi| 11 +++
 gcc/testsuite/gcc.target/bpf/naked-1.c | 12 
 3 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/bpf/naked-1.c

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 1d0abd7fbb3..437bd652de3 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -154,6 +154,10 @@ static const struct attribute_spec bpf_attribute_table[] =
  { "preserve_access_index", 0, -1, false, true, false, true,
bpf_handle_preserve_access_index_attribute, NULL },
 
+ /* Support for `naked' function attribute.  */
+ { "naked", 0, 1, false, false, false, false,
+   bpf_handle_fndecl_attribute, NULL },
+
  /* The last attribute spec is set to be NULL.  */
  { NULL,   0,  0, false, false, false, false, NULL, NULL }
 };
@@ -335,6 +339,21 @@ bpf_function_value_regno_p (const unsigned int regno)
 #undef TARGET_FUNCTION_VALUE_REGNO_P
 #define TARGET_FUNCTION_VALUE_REGNO_P bpf_function_value_regno_p
 
+
+/* Determine whether to warn about lack of return statement in a
+   function.  */
+
+static bool
+bpf_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including
+ the return instructions.  */
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
+}
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN bpf_warn_func_return
+
 /* Compute the size of the function's stack frame, including the local
area and the register-save area.  */
 
@@ -388,6 +407,9 @@ bpf_expand_prologue (void)
  dynamically.  This should have been checked already and an error
  emitted.  */
   gcc_assert (!cfun->calls_alloca);
+
+  /* If we ever need to have a proper prologue here, please mind the
+ `naked' function attribute.  */
 }
 
 /* Expand to the instructions in a function epilogue.  This function
@@ -399,6 +421,9 @@ bpf_expand_epilogue (void)
   /* See note in bpf_expand_prologue for an explanation on why we are
  not restoring callee-saved registers in BPF.  */
 
+  /* If we ever need to do anything else than just generating a return
+ instruction here, please mind the `naked' function attribute.  */
+
   emit_jump_insn (gen_exit ());
 }
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b363386df6e..f657032cbef 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5172,6 +5172,17 @@ attribute.  Example:
 int bpf_probe_read (void *dst, int size, const void *unsafe_ptr)
   __attribute__ ((kernel_helper (4)));
 @end smallexample
+
+@cindex @code{naked} function attribute, BPF
+@item naked
+This attribute allows the compiler to construct the requisite function
+declaration, while allowing the body of the function to be assembly
+code.  The specified function will not have prologue/epilogue
+sequences generated by the compiler.  Only basic @code{asm} statements
+can safely be included in naked functions (@pxref{Basic Asm}).  While
+using extended @code{asm} or a mixture of basic @code{asm} and C code
+may appear to work, they cannot be depended upon to work reliably and
+are not supported.
 @end table
 
 @node C-SKY Function Attributes
diff --git a/gcc/testsuite/gcc.target/bpf/naked-1.c 
b/gcc/testsuite/gcc.target/bpf/naked-1.c
new file mode 100644
index 000..cbbc4c51697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/naked-1.c
@@ -0,0 +1,12 @@
+/* Verify that __attribute__((naked)) is accepted and
+   produces a naked function.  Also, the compiler must not
+   warn for the lack of return statement.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -Wreturn-type" } */
+
+int __attribute__((naked)) foo()
+{
+  __asm__ volatile ("@ naked");
+}
+/* { dg-final { scan-assembler "\t@ naked" } } */
+/* { dg-final { scan-assembler "\texit\n" } } */
-- 
2.30.2



Re: [PATCH] bpf: fix pseudoc w regs for small modes [PR111029]

2023-08-17 Thread Jose E. Marchesi via Gcc-patches


> On Tue, Aug 15, 2023 at 9:03 PM Jose E. Marchesi via Gcc-patches
>  wrote:
>>
>>
>> Hello David.
>> Thanks for the patch.
>>
>> OK.
>
> Picking a random patch/mail for this question - how do we maintain BPF
> support for the most recent GCC release which is GCC 13?  I see the
> current state in GCC 13 isn't fully able to provide upstream kernel BPF
> support but GCC 14 contains some bugfixes and some new features(?).
> Is it worthwhile to backport at least bugfixes while GCC 14 is still in
> development even if those are not regression fixes?  Or is GCC 13 BPF
> too broken to be used anyway?

Our plan is:

1. Get git GCC and git binutils to compile all the kernel BPF selftests.
   This covers both functionality (builtins, attributes, BTF, CO-RE,
   etc) and consolidation of behavior between the GNU and llvm bpf
   ports.  We are working very hard to achieve this point and we are
   very near: functionality wise we are on-par in all components, but
   there are some bugs we are fixing.  We expect to be done in a couple
   of weeks.

2. Once the above is achieved, we plan to start doing the backports to
   released/maintained versions of both binutils and GCC so distros like
   Debian (that already package gcc-bpf) can use the toolchain.

3. Next step is to make sure the compiler generates code that can
   generally satisfy the many restrictions imposed by the kernel
   verifier, at least to a point that is practical.  This is a difficult
   general problem not specific to GCC and is shared by llvm and other
   optimizing compilers, sort of a moving target, and it is not clear at
   all how to achieve this in a general and practical way.  We have some
   ideas and have submitted a proposal to discuss this topic during this
   year's Cauldron: "The challenge of compiling for verified targets".

> Thanks,
> Richard.
>
>> > In the BPF pseudo-c assembly dialect, registers treated as 32-bits
>> > rather than the full 64 in various instructions ought to be printed as
>> > "wN" rather than "rN".  But bpf_print_register () was only doing this
>> > for specifically SImode registers, meaning smaller modes were printed
>> > incorrectly.
>> >
>> > This caused assembler errors like:
>> >
>> >   Error: unrecognized instruction `w2 =(s8)r1'
>> >
>> > for a 32-bit sign-extending register move instruction, where the source
>> > register is used in QImode.
>> >
>> > Fix bpf_print_register () to print the "w" version of register when
>> > specified by the template for any mode 32-bits or smaller.
>> >
>> > Tested on bpf-unknown-none.
>> >
>> >   PR target/111029
>> >
>> > gcc/
>> >   * config/bpf/bpf.cc (bpf_print_register): Print 'w' registers
>> >   for any mode 32-bits or smaller, not just SImode.
>> >
>> > gcc/testsuite/
>> >
>> >   * gcc.target/bpf/smov-2.c: New test.
>> >   * gcc.target/bpf/smov-pseudoc-2.c: New test.
>> > ---
>> >  gcc/config/bpf/bpf.cc |  2 +-
>> >  gcc/testsuite/gcc.target/bpf/smov-2.c | 15 +++
>> >  gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c | 15 +++
>> >  3 files changed, 31 insertions(+), 1 deletion(-)
>> >  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-2.c
>> >  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c
>> >
>> > diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
>> > index 3516b79bce4..1d0abd7fbb3 100644
>> > --- a/gcc/config/bpf/bpf.cc
>> > +++ b/gcc/config/bpf/bpf.cc
>> > @@ -753,7 +753,7 @@ bpf_print_register (FILE *file, rtx op, int code)
>> >  fprintf (file, "%s", reg_names[REGNO (op)]);
>> >else
>> >  {
>> > -  if (code == 'w' && GET_MODE (op) == SImode)
>> > +  if (code == 'w' && GET_MODE_SIZE (GET_MODE (op)) <= 4)
>> >   {
>> > if (REGNO (op) == BPF_FP)
>> >   fprintf (file, "w10");
>> > diff --git a/gcc/testsuite/gcc.target/bpf/smov-2.c 
>> > b/gcc/testsuite/gcc.target/bpf/smov-2.c
>> > new file mode 100644
>> > index 000..6f3516d2385
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/bpf/smov-2.c
>> > @@ -0,0 +1,15 @@
>> > +/* Check signed 32-bit mov instructions.  */
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-mcpu=v4 -O2" } */
>> > +
>> > +int
>> > +foo (unsigned char a, unsigned short b)
>>

Re: [PATCH] bpf: remove useless define_insn for extendsisi2

2023-08-15 Thread Jose E. Marchesi via Gcc-patches


OK.
Thanks!

> This define_insn is never used, since a sign-extend to the same mode is
> just a move, so delete it.
>
> Tested on x86_64-linux-gnu host for bpf-unknown-none target.
>
> gcc/
>
>   * config/bpf/bpf.md (extendsisi2): Delete useless define_insn.
> ---
>  gcc/config/bpf/bpf.md | 7 ---
>  1 file changed, 7 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index e0a42b9f939..a64de1095ed 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -350,13 +350,6 @@ (define_insn "extendqidi2"
> {ldxsb\t%0,%1|%0 = *(s8 *) (%1)}"
>[(set_attr "type" "alu,ldx")])
>  
> -(define_insn "extendsisi2"
> -  [(set (match_operand:SI 0 "register_operand" "=r")
> -(sign_extend:SI (match_operand:SI 1 "register_operand" "r")))]
> -  "bpf_has_smov"
> -  "{movs32\t%0,%1,32|%w0 = (s32) %w1}"
> -  [(set_attr "type" "alu")])
> -
>  (define_insn "extendhisi2"
>[(set (match_operand:SI 0 "register_operand" "=r")
>  (sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]


Re: [PATCH] bpf: fix pseudoc w regs for small modes [PR111029]

2023-08-15 Thread Jose E. Marchesi via Gcc-patches


Hello David.
Thanks for the patch.

OK.

> In the BPF pseudo-c assembly dialect, registers treated as 32-bits
> rather than the full 64 in various instructions ought to be printed as
> "wN" rather than "rN".  But bpf_print_register () was only doing this
> for specifically SImode registers, meaning smaller modes were printed
> incorrectly.
>
> This caused assembler errors like:
>
>   Error: unrecognized instruction `w2 =(s8)r1'
>
> for a 32-bit sign-extending register move instruction, where the source
> register is used in QImode.
>
> Fix bpf_print_register () to print the "w" version of register when
> specified by the template for any mode 32-bits or smaller.
>
> Tested on bpf-unknown-none.
>
>   PR target/111029
>
> gcc/
>   * config/bpf/bpf.cc (bpf_print_register): Print 'w' registers
>   for any mode 32-bits or smaller, not just SImode.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/smov-2.c: New test.
>   * gcc.target/bpf/smov-pseudoc-2.c: New test.
> ---
>  gcc/config/bpf/bpf.cc |  2 +-
>  gcc/testsuite/gcc.target/bpf/smov-2.c | 15 +++
>  gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c | 15 +++
>  3 files changed, 31 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 3516b79bce4..1d0abd7fbb3 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -753,7 +753,7 @@ bpf_print_register (FILE *file, rtx op, int code)
>  fprintf (file, "%s", reg_names[REGNO (op)]);
>else
>  {
> -  if (code == 'w' && GET_MODE (op) == SImode)
> +  if (code == 'w' && GET_MODE_SIZE (GET_MODE (op)) <= 4)
>   {
> if (REGNO (op) == BPF_FP)
>   fprintf (file, "w10");
> diff --git a/gcc/testsuite/gcc.target/bpf/smov-2.c 
> b/gcc/testsuite/gcc.target/bpf/smov-2.c
> new file mode 100644
> index 000..6f3516d2385
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/smov-2.c
> @@ -0,0 +1,15 @@
> +/* Check signed 32-bit mov instructions.  */
> +/* { dg-do compile } */
> +/* { dg-options "-mcpu=v4 -O2" } */
> +
> +int
> +foo (unsigned char a, unsigned short b)
> +{
> +  int x = (char) a;
> +  int y = (short) b;
> +
> +  return x + y;
> +}
> +
> +/* { dg-final { scan-assembler {movs32\t%r.,%r.,8\n} } } */
> +/* { dg-final { scan-assembler {movs32\t%r.,%r.,16\n} } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c 
> b/gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c
> new file mode 100644
> index 000..6af6cadf8df
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/smov-pseudoc-2.c
> @@ -0,0 +1,15 @@
> +/* Check signed 32-bit mov instructions (pseudo-C asm dialect).  */
> +/* { dg-do compile } */
> +/* { dg-options "-mcpu=v4 -O2 -masm=pseudoc" } */
> +
> +int
> +foo (unsigned char a, unsigned short b)
> +{
> +  int x = (char) a;
> +  int y = (short) b;
> +
> +  return x + y;
> +}
> +
> +/* { dg-final { scan-assembler {w. = \(s8\) w.\n} } } */
> +/* { dg-final { scan-assembler {w. = \(s16\) w.\n} } } */


[COMMITTED] bpf: liberate R9 for general register allocation

2023-08-11 Thread Jose E. Marchesi via Gcc-patches
We were reserving one of the hard registers in BPF in order to
implement dynamic stack allocation: alloca and VLAs. However, there is
kernel code that has inline assembly that requires all the non-fixed
registers to be available for register allocation.

This patch:

1. Liberates r9 that is now available for register allocation.

2. Adds a check to GCC so it errors out if the user tries to do
   dynamic stack allocation.  A couple of tests are added for this.

3. Changes xbpf so it no longer saves and restores callee-saved
   registers.  A couple of tests for this have been removed.

4. Adds bpf-*-* to the list of targets that do not support alloca in
   target-support.exp.

Tested in host x86_64-linux-gnu and target bpf-unknown-none.

gcc/ChangeLog

* config/bpf/bpf.md (allocate_stack): Define.
* config/bpf/bpf.h (FIRST_PSEUDO_REGISTER): Make room for fake
stack pointer register.
(FIXED_REGISTERS): Adjust accordingly.
(CALL_USED_REGISTERS): Likewise.
(REG_CLASS_CONTENTS): Likewise.
(REGISTER_NAMES): Likewise.
* config/bpf/bpf.cc (bpf_compute_frame_layout): Do not reserve
space for callee-saved registers.
(bpf_expand_prologue): Do not save callee-saved registers in xbpf.
(bpf_expand_epilogue): Do not restore callee-saved registers in
xbpf.

gcc/testsuite/ChangeLog

* lib/target-supports.exp (check_effective_target_alloca): BPF
target does not support alloca.
* gcc.target/bpf/diag-alloca-1.c: New test.
* gcc.target/bpf/diag-alloca-2.c: Likewise.
* gcc.target/bpf/xbpf-callee-saved-regs-1.c: Remove test.
* gcc.target/bpf/xbpf-callee-saved-regs-2.c: Likewise.
* gcc.target/bpf/regs-availability-1.c: Likewise.
---
 gcc/config/bpf/bpf.cc | 128 ++
 gcc/config/bpf/bpf.h  |  23 ++--
 gcc/config/bpf/bpf.md |  13 ++
 gcc/testsuite/gcc.target/bpf/diag-alloca-1.c  |   9 ++
 gcc/testsuite/gcc.target/bpf/diag-alloca-2.c  |   9 ++
 .../gcc.target/bpf/regs-availability-1.c  |  21 +++
 .../gcc.target/bpf/xbpf-callee-saved-regs-1.c |  17 ---
 .../gcc.target/bpf/xbpf-callee-saved-regs-2.c |  17 ---
 gcc/testsuite/lib/target-supports.exp |   3 +
 9 files changed, 82 insertions(+), 158 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/diag-alloca-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/diag-alloca-2.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/regs-availability-1.c
 delete mode 100644 gcc/testsuite/gcc.target/bpf/xbpf-callee-saved-regs-1.c
 delete mode 100644 gcc/testsuite/gcc.target/bpf/xbpf-callee-saved-regs-2.c

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index d27a971d0af..3516b79bce4 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -76,10 +76,6 @@ struct GTY(()) machine_function
 {
   /* Number of bytes saved on the stack for local variables.  */
   int local_vars_size;
-
-  /* Number of bytes saved on the stack for callee-saved
- registers.  */
-  int callee_saved_reg_size;
 };
 
 /* Handle an attribute requiring a FUNCTION_DECL;
@@ -346,7 +342,7 @@ static void
 bpf_compute_frame_layout (void)
 {
   int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
-  int padding_locals, regno;
+  int padding_locals;
 
   /* Set the space used in the stack by local variables.  This is
  rounded up to respect the minimum stack alignment.  */
@@ -358,23 +354,9 @@ bpf_compute_frame_layout (void)
 
   cfun->machine->local_vars_size += padding_locals;
 
-  if (TARGET_XBPF)
-{
-  /* Set the space used in the stack by callee-saved used
-registers in the current function.  There is no need to round
-up, since the registers are all 8 bytes wide.  */
-  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-   if ((df_regs_ever_live_p (regno)
-&& !call_used_or_fixed_reg_p (regno))
-   || (cfun->calls_alloca
-   && regno == STACK_POINTER_REGNUM))
- cfun->machine->callee_saved_reg_size += 8;
-}
-
   /* Check that the total size of the frame doesn't exceed the limit
  imposed by eBPF.  */
-  if ((cfun->machine->local_vars_size
-   + cfun->machine->callee_saved_reg_size) > bpf_frame_limit)
+  if (cfun->machine->local_vars_size > bpf_frame_limit)
 {
   static int stack_limit_exceeded = 0;
 
@@ -393,69 +375,19 @@ bpf_compute_frame_layout (void)
 void
 bpf_expand_prologue (void)
 {
-  HOST_WIDE_INT size;
-
-  size = (cfun->machine->local_vars_size
- + cfun->machine->callee_saved_reg_size);
-
   /* The BPF "hardware" provides a fresh new set of registers for each
  called function, some of which are initialized to the values of
  the arguments passed in the first five registers.  In doing so,
- it saves the values of the registers of the caller, and restored
+ it saves the values of the registers of the caller, and 

[COMMITTED] bpf: allow exceeding max num of args in BPF when always_inline

2023-08-11 Thread Jose E. Marchesi via Gcc-patches
BPF currently limits the number of registers used to pass arguments to
functions to five registers.  There is a check for this at function
expansion time.  However, if a function is guaranteed to be always
inlined (and its body never generated) by virtue of the always_inline
attribute, it can "receive" any number of arguments.

Tested in host x86_64-linux-gnu and target bpf-unknown-none.

gcc/ChangeLog

* config/bpf/bpf.cc (bpf_function_arg_advance): Do not complain
about too many arguments if function is always inlined.

gcc/testsuite/ChangeLog

* gcc.target/bpf/diag-funargs-inline-1.c: New test.
* gcc.target/bpf/diag-funargs.c: Adapt test.
---
 gcc/config/bpf/bpf.cc |  9 +++-
 .../gcc.target/bpf/diag-funargs-inline-1.c| 21 +++
 gcc/testsuite/gcc.target/bpf/diag-funargs.c   |  8 ++-
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/diag-funargs-inline-1.c

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 33218b3a818..d27a971d0af 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -732,7 +732,14 @@ bpf_function_arg_advance (cumulative_args_t ca,
   unsigned num_words = CEIL (num_bytes, UNITS_PER_WORD);
 
   if (*cum <= 5 && *cum + num_words > 5)
-error ("too many function arguments for eBPF");
+{
+  /* Too many arguments for BPF.  However, if the function is
+ gonna be inline for sure, we let it pass.  Otherwise, issue
+ an error.  */
+  if (!lookup_attribute ("always_inline",
+ DECL_ATTRIBUTES (cfun->decl)))
+error ("too many function arguments for eBPF");
+}
 
   *cum += num_words;
 }
diff --git a/gcc/testsuite/gcc.target/bpf/diag-funargs-inline-1.c 
b/gcc/testsuite/gcc.target/bpf/diag-funargs-inline-1.c
new file mode 100644
index 000..e917ef1294e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/diag-funargs-inline-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+inline int __attribute__ ((always_inline))
+foo (int a1,
+ int a2,
+ int a3,
+ int a4,
+ int a5,
+ int a6)
+{
+  return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+int
+bar (int i1, int i2, int i3, int i4, int i5)
+{
+  return foo (i1, i2, i3, i4, i5, 10);
+}
+
+/* { dg-final { scan-assembler-not "call\t.*" } } */
diff --git a/gcc/testsuite/gcc.target/bpf/diag-funargs.c 
b/gcc/testsuite/gcc.target/bpf/diag-funargs.c
index d4e9c0683f2..42b5f05b67c 100644
--- a/gcc/testsuite/gcc.target/bpf/diag-funargs.c
+++ b/gcc/testsuite/gcc.target/bpf/diag-funargs.c
@@ -11,5 +11,11 @@ foo (int a1,  /* { dg-error "too many function arguments" } 
*/
  int a5,
  int a6)
 {
-  return a6;
+  return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+int
+bar (int i1, int i2, int i3, int i4, int i5)
+{
+  return foo (i1, i2, i3, i4, i5, 10);
 }
-- 
2.30.2



Re: [PATCH] bpf: Fixed GC mistakes in BPF builtins code.

2023-08-08 Thread Jose E. Marchesi via Gcc-patches


Hi Cuper.

OK.  Hopefully all the roots are marked now to avoid these nodes being
collected.

Thanks.

> Hi everyone,
>
> This patch fixes BPF CO-RE builtins support that missed information for
> garbage collector (GC).
>
> The BPF CO-RE implementation defines several data structures that keep
> builtin information throught all of the compilation flow aside from
> code.  This intentionally avoids having the builtin calls arguments
> expressions/enum/types tree nodes within the compiling code in order to
> avoid the compiler to optimize those away, based on information in
> current compilation unit.
> CO-RE builtins are target kernel specific and very little can be infered
> from type inforamtion within the compilation unit.
>
> Fault was triggered when attempting to compile some BPF kernel big
> examples that revealed the lack of GC information.
>
> Patch also removes some spurious includes of header files.
>
> Best regards,
> Cupertino
>
>
>
> commit c71b5c604189d04664c5b5ee155326fa4b79808b
> Author: Cupertino Miranda 
> Date:   Tue Aug 8 11:12:00 2023 +0100
>
> bpf: Fixed GC mistakes in BPF builtins code.
> 
> This patches fixes problems with GC within the CO-RE builtins
> implementation.
> List of included headers was also reviseD.
> 
> gcc/ChangeLog:
> 
> * config/bpf/core-builtins.cc: Cleaned include headers.
> (struct cr_builtins): Added GTY.
> (cr_builtins_ref): Created.
> (builtins_data) Changed to GC root.
> (allocate_builtin_data): Changed.
> Included gt-core-builtins.h.
> * config/bpf/coreout.cc: (bpf_core_extra) Added GTY.
> (bpf_core_extra_ref): Created.
> (bpf_comment_info): Changed to GC root.
> (bpf_core_reloc_add, output_btfext_header, btf_ext_init): Changed.
>
> diff --git a/gcc/config/bpf/core-builtins.cc b/gcc/config/bpf/core-builtins.cc
> index 575e63d8ea77..c3222b4c7804 100644
> --- a/gcc/config/bpf/core-builtins.cc
> +++ b/gcc/config/bpf/core-builtins.cc
> @@ -22,52 +22,23 @@ along with GCC; see the file COPYING3.  If not see
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> -#include "tm.h"
> +#include "target.h"
>  #include "rtl.h"
> -#include "regs.h"
> -#include "insn-config.h"
> -#include "insn-attr.h"
> -#include "recog.h"
>  #include "output.h"
> -#include "alias.h"
>  #include "tree.h"
>  #include "stringpool.h"
>  #include "attribs.h"
> -#include "varasm.h"
> -#include "stor-layout.h"
> -#include "calls.h"
>  #include "function.h"
> -#include "explow.h"
>  #include "memmodel.h"
>  #include "emit-rtl.h"
> -#include "reload.h"
> -#include "tm_p.h"
> -#include "target.h"
> -#include "basic-block.h"
>  #include "expr.h"
> -#include "optabs.h"
> -#include "bitmap.h"
> -#include "df.h"
> -#include "c-family/c-common.h"
>  #include "diagnostic.h"
> -#include "builtins.h"
> -#include "predict.h"
>  #include "langhooks.h"
> -#include "flags.h"
> -
> -#include "cfg.h"
> +#include "basic-block.h"
>  #include "gimple.h"
>  #include "gimple-iterator.h"
>  #include "gimple-walk.h"
>  #include "tree-pass.h"
> -#include "tree-iterator.h"
> -
> -#include "context.h"
> -#include "pass_manager.h"
> -
> -#include "gimplify.h"
> -#include "gimplify-me.h"
> -
>  #include "plugin.h"
>  
>  #include "ctfc.h"
> @@ -159,37 +130,41 @@ along with GCC; see the file COPYING3.  If not see
>  as a builtin.  */
>  
>  
> -struct cr_builtins
> +struct GTY(()) cr_builtins
>  {
>tree type;
>tree expr;
>tree default_value;
>rtx rtx_default_value;
> -  enum btf_core_reloc_kind kind; /* Recovered from proper argument.  */
> +  enum btf_core_reloc_kind kind;
>enum bpf_builtins orig_builtin_code;
>tree orig_arg_expr;
>  };
> +typedef struct cr_builtins *cr_builtins_ref;
>  
>  #define CORE_BUILTINS_DATA_EMPTY \
>{ NULL_TREE, NULL_TREE, NULL_TREE, NULL_RTX, BPF_RELO_INVALID, \
>  BPF_BUILTIN_UNUSED, NULL }
>  
>  /* Vector definition and its access function.  */
> -vec builtins_data;
> +static GTY(()) vec *builtins_data = NULL;
>  
>  static inline int
>  allocate_builtin_data ()
>  {
> -  struct cr_builtins data = CORE_BUILTINS_DATA_EMPTY;
> -  int ret = builtins_data.length ();
> -  builtins_data.safe_push (data);
> +  if (builtins_data == NULL)
> +vec_alloc (builtins_data, 1);
> +
> +  cr_builtins_ref data = ggc_cleared_alloc ();
> +  int ret = builtins_data->length ();
> +  vec_safe_push (builtins_data, data);
>return ret;
>  }
>  
>  static inline struct cr_builtins *
>  get_builtin_data (int index)
>  {
> -  return _data[index];
> +  return (*builtins_data)[index];
>  }
>  
>  typedef bool
> @@ -200,11 +175,12 @@ search_builtin_data (builtin_local_data_compare_fn 
> callback,
>struct cr_builtins *elem)
>  {
>unsigned int i;
> -  for (i = 0; i < builtins_data.length (); i++)
> -if ((callback != NULL && (callback) (elem, _data[i]))
> -   || (callback == NULL
> - 

Re: [v2 PATCH 2/2] bpf: CO-RE builtins support tests.

2023-08-03 Thread Jose E. Marchesi via Gcc-patches


OK.
Thanks.

> Hi,
>
> Resending this patch since I have noticed I had a testcase added in
> previous patch. Makes more sense here.
>
> Thanks,
> Cupertino
>
> From 334e9ae0f428f6573f2a5e8a3067a4d181b8b9c5 Mon Sep 17 00:00:00 2001
> From: Cupertino Miranda 
> Date: Thu, 27 Jul 2023 18:05:22 +0100
> Subject: [PATCH v2 2/2] bpf: CO-RE builtins support tests.
>
> This patch adds tests for the following builtins:
>   __builtin_preserve_enum_value
>   __builtin_btf_type_id
>   __builtin_preserve_type_info
> ---
>  .../gcc.target/bpf/core-builtin-enumvalue.c   |  52 +
>  .../bpf/core-builtin-enumvalue_errors.c   |  22 
>  .../bpf/core-builtin-enumvalue_opt.c  |  35 ++
>  ...core-builtin-fieldinfo-const-elimination.c |  29 +
>  .../bpf/core-builtin-fieldinfo-errors-1.c |   2 +-
>  .../bpf/core-builtin-fieldinfo-errors-2.c |   2 +-
>  .../gcc.target/bpf/core-builtin-type-based.c  |  58 ++
>  .../gcc.target/bpf/core-builtin-type-id.c |  40 +++
>  gcc/testsuite/gcc.target/bpf/core-support.h   | 109 ++
>  9 files changed, 347 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue.c
>  create mode 100644 
> gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue_errors.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue_opt.c
>  create mode 100644 
> gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-const-elimination.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-support.h
>
> diff --git a/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue.c 
> b/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue.c
> new file mode 100644
> index ..3e3334dc089a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue.c
> @@ -0,0 +1,52 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -dA -gbtf -mco-re" } */
> +
> +#include "core-support.h"
> +
> +extern int *v;
> +
> +int foo(void *data)
> +{
> + int i = 0;
> + enum named_ue64 named_unsigned64 = 0;
> + enum named_se64 named_signed64 = 0;
> + enum named_ue named_unsigned = 0;
> + enum named_se named_signed = 0;
> +
> + v[i++] = bpf_core_enum_value_exists (named_unsigned64, UE64_VAL1);
> + v[i++] = bpf_core_enum_value_exists (enum named_ue64, UE64_VAL2);
> + v[i++] = bpf_core_enum_value_exists (enum named_ue64, UE64_VAL3);
> + v[i++] = bpf_core_enum_value_exists (named_signed64, SE64_VAL1);
> + v[i++] = bpf_core_enum_value_exists (enum named_se64, SE64_VAL2);
> + v[i++] = bpf_core_enum_value_exists (enum named_se64, SE64_VAL3);
> +
> + v[i++] = bpf_core_enum_value (named_unsigned64, UE64_VAL1);
> + v[i++] = bpf_core_enum_value (named_unsigned64, UE64_VAL2);
> + v[i++] = bpf_core_enum_value (named_signed64, SE64_VAL1);
> + v[i++] = bpf_core_enum_value (named_signed64, SE64_VAL2);
> +
> + v[i++] = bpf_core_enum_value_exists (named_unsigned, UE_VAL1);
> + v[i++] = bpf_core_enum_value_exists (enum named_ue, UE_VAL2);
> + v[i++] = bpf_core_enum_value_exists (enum named_ue, UE_VAL3);
> + v[i++] = bpf_core_enum_value_exists (named_signed, SE_VAL1);
> + v[i++] = bpf_core_enum_value_exists (enum named_se, SE_VAL2);
> + v[i++] = bpf_core_enum_value_exists (enum named_se, SE_VAL3);
> +
> + v[i++] = bpf_core_enum_value (named_unsigned, UE_VAL1);
> + v[i++] = bpf_core_enum_value (named_unsigned, UE_VAL2);
> + v[i++] = bpf_core_enum_value (named_signed, SE_VAL1);
> + v[i++] = bpf_core_enum_value (named_signed, SE_VAL2);
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "\t.4byte\t0x8\t; bpfcr_type 
> \\(named_ue64\\)" 5 } } */
> +/* { dg-final { scan-assembler-times "\t.4byte\t0x9\t; bpfcr_type 
> \\(named_se64\\)" 5} } */
> +/* { dg-final { scan-assembler-times "\t.4byte\t0xb\t; bpfcr_type 
> \\(named_ue\\)" 5 } } */
> +/* { dg-final { scan-assembler-times "\t.4byte\t0xc\t; bpfcr_type 
> \\(named_se\\)" 5} } */
> +/* { dg-final { scan-assembler-times "\t.4byte\t0xa\t; bpfcr_kind" 12 } } 
> BPF_ENUMVAL_EXISTS */
> +/* { dg-final { scan-assembler-times "\t.4byte\t0xb\t; bpfcr_kind" 8 } } 
> BPF_ENUMVAL_VALUE */
> +
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"0\"\\)" 8 } } */
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"1\"\\)" 8 } } */
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"2\"\\)" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue_errors.c 
> b/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue_errors.c
> new file mode 100644
> index ..138e99895160
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-builtin-enumvalue_errors.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -dA -gbtf -mco-re" } */
> +
> +#include "core-support.h"
> +
> +extern int *v;
> +
> +unsigned long foo(void *data)
> +{
> +  int i = 0;
> +  enum named_ue64 

Re: [v2 PATCH 1/2] bpf: Implementation of BPF CO-RE builtins

2023-08-03 Thread Jose E. Marchesi via Gcc-patches


Ok.
Thanks!

> From fda9603ded735205b6e20fc5b65a04f8d15685e6 Mon Sep 17 00:00:00 2001
> From: Cupertino Miranda 
> Date: Thu, 6 Apr 2023 15:22:48 +0100
> Subject: [PATCH v2 1/2] bpf: Implementation of BPF CO-RE builtins
>
> This patch updates the support for the BPF CO-RE builtins
> __builtin_preserve_access_index and __builtin_preserve_field_info,
> and adds support for the CO-RE builtins __builtin_btf_type_id,
> __builtin_preserve_type_info and __builtin_preserve_enum_value.
>
> These CO-RE relocations are now converted to __builtin_core_reloc which
> abstracts all of the original builtins in a polymorphic relocation
> specific builtin.
>
> The builtin processing is now split in 2 stages, the first (pack) is
> executed right after the front-end and the second (process) right before
> the asm output.
>
> In expand pass the __builtin_core_reloc is converted to a
> unspec:UNSPEC_CORE_RELOC rtx entry.
>
> The data required to process the builtin is now collected in the packing
> stage (after front-end), not allowing the compiler to optimize any of
> the relevant information required to compose the relocation when
> necessary.
> At expansion, that information is recovered and CTF/BTF is queried to
> construct the information that will be used in the relocation.
> At this point the relocation is added to specific section and the
> builtin is expanded to the expected default value for the builtin.
>
> In order to process __builtin_preserve_enum_value, it was necessary to
> hook the front-end to collect the original enum value reference.
> This is needed since the parser folds all the enum values to its
> integer_cst representation.
>
> More details can be found within the core-builtins.cc.
>
> Regtested in host x86_64-linux-gnu and target bpf-unknown-none.
> ---
>  gcc/config.gcc  |4 +-
>  gcc/config/bpf/bpf-passes.def   |   20 -
>  gcc/config/bpf/bpf-protos.h |4 +-
>  gcc/config/bpf/bpf.cc   |  806 ++
>  gcc/config/bpf/bpf.md   |   17 +
>  gcc/config/bpf/core-builtins.cc | 1394 +++
>  gcc/config/bpf/core-builtins.h  |   35 +
>  gcc/config/bpf/coreout.cc   |   50 +-
>  gcc/config/bpf/coreout.h|   13 +-
>  gcc/config/bpf/t-bpf|6 +-
>  gcc/doc/extend.texi |   51 ++
>  11 files changed, 1595 insertions(+), 805 deletions(-)
>  delete mode 100644 gcc/config/bpf/bpf-passes.def
>  create mode 100644 gcc/config/bpf/core-builtins.cc
>  create mode 100644 gcc/config/bpf/core-builtins.h
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index eba69a463be0..c521669e78b1 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1597,8 +1597,8 @@ bpf-*-*)
>  use_collect2=no
>  extra_headers="bpf-helpers.h"
>  use_gcc_stdint=provide
> -extra_objs="coreout.o"
> -target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc"
> +extra_objs="coreout.o core-builtins.o"
> +target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc 
> \$(srcdir)/config/bpf/core-builtins.cc"
>  ;;
>  cris-*-elf | cris-*-none)
>   tm_file="elfos.h newlib-stdint.h ${tm_file}"
> diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
> deleted file mode 100644
> index deeaee988a01..
> --- a/gcc/config/bpf/bpf-passes.def
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/* Declaration of target-specific passes for eBPF.
> -   Copyright (C) 2021-2023 Free Software Foundation, Inc.
> -
> -   This file is part of GCC.
> -
> -   GCC is free software; you can redistribute it and/or modify it
> -   under the terms of the GNU General Public License as published by
> -   the Free Software Foundation; either version 3, or (at your option)
> -   any later version.
> -
> -   GCC is distributed in the hope that it will be useful, but
> -   WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   General Public License for more details.
> -
> -   You should have received a copy of the GNU General Public License
> -   along with GCC; see the file COPYING3.  If not see
> -   .  */
> -
> -INSERT_PASS_AFTER (pass_df_initialize_opt, 1, pass_bpf_core_attr);
> diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
> index b484310e8cbf..fbe0d8a0213f 100644
> --- a/gcc/config/bpf/bpf-protos.h
> +++ b/gcc/config/bpf/bpf-protos.h
> @@ -30,7 +30,7 @@ extern void bpf_print_operand_address (FILE *, rtx);
>  extern void bpf_expand_prologue (void);
>  extern void bpf_expand_epilogue (void);
>  extern void bpf_expand_cbranch (machine_mode, rtx *);
> -
> -rtl_opt_pass * make_pass_bpf_core_attr (gcc::context *);
> +const char *bpf_add_core_reloc (rtx *operands, const char *templ);
> +void bpf_replace_core_move_operands (rtx *operands);
>  
>  #endif /* ! GCC_BPF_PROTOS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> 

Re: [PATCH 1/2] bpf: Implementation of BPF CO-RE builtins

2023-08-03 Thread Jose E. Marchesi via Gcc-patches


> Jose E. Marchesi writes:
>
>>> This patch updates the support for the BPF CO-RE builtins
>>> __builtin_preserve_access_index and __builtin_preserve_field_info,
>>> and adds support for the CO-RE builtins __builtin_btf_type_id,
>>> __builtin_preserve_type_info and __builtin_preserve_enum_value.
>>>
>>> These CO-RE relocations are now converted to __builtin_core_reloc which
>>> abstracts all of the original builtins in a polymorphic relocation
>>> specific builtin.
>>>
>>> The builtin processing is now split in 2 stages, the first (pack) is
>>> executed right after the front-end and the second (process) right before
>>> the asm output.
>>>
>>> In expand pass the __builtin_core_reloc is converted to a
>>> unspec:UNSPEC_CORE_RELOC rtx entry.
>>>
>>> The data required to process the builtin is now collected in the packing
>>> stage (after front-end), not allowing the compiler to optimize any of
>>> the relevant information required to compose the relocation when
>>> necessary.
>>> At expansion, that information is recovered and CTF/BTF is queried to
>>> construct the information that will be used in the relocation.
>>> At this point the relocation is added to specific section and the
>>> builtin is expanded to the expected default value for the builtin.
>>>
>>> In order to process __builtin_preserve_enum_value, it was necessary to
>>> hook the front-end to collect the original enum value reference.
>>> This is needed since the parser folds all the enum values to its
>>> integer_cst representation.
>>>
>>> More details can be found within the core-builtins.cc.
>>>
>>> Regtested in host x86_64-linux-gnu and target bpf-unknown-none.
>>> ---
>>>  gcc/config.gcc|4 +-
>>>  gcc/config/bpf/bpf-passes.def |   20 -
>>>  gcc/config/bpf/bpf-protos.h   |4 +-
>>>  gcc/config/bpf/bpf.cc |  817 +-
>>>  gcc/config/bpf/bpf.md |   17 +
>>>  gcc/config/bpf/core-builtins.cc   | 1397 +
>>>  gcc/config/bpf/core-builtins.h|   36 +
>>>  gcc/config/bpf/coreout.cc |   50 +-
>>>  gcc/config/bpf/coreout.h  |   13 +-
>>>  gcc/config/bpf/t-bpf  |6 +-
>>>  gcc/doc/extend.texi   |   51 +
>>>  ...core-builtin-fieldinfo-const-elimination.c |   29 +
>>>  12 files changed, 1639 insertions(+), 805 deletions(-)
>>>  delete mode 100644 gcc/config/bpf/bpf-passes.def
>>>  create mode 100644 gcc/config/bpf/core-builtins.cc
>>>  create mode 100644 gcc/config/bpf/core-builtins.h
>>>  create mode 100644 
>>> gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-const-elimination.c
>>>
>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>> index eba69a463be0..c521669e78b1 100644
>>> --- a/gcc/config.gcc
>>> +++ b/gcc/config.gcc
>>> @@ -1597,8 +1597,8 @@ bpf-*-*)
>>>  use_collect2=no
>>>  extra_headers="bpf-helpers.h"
>>>  use_gcc_stdint=provide
>>> -extra_objs="coreout.o"
>>> -target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc"
>>> +extra_objs="coreout.o core-builtins.o"
>>> +target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc 
>>> \$(srcdir)/config/bpf/core-builtins.cc"
>>>  ;;
>>>  cris-*-elf | cris-*-none)
>>> tm_file="elfos.h newlib-stdint.h ${tm_file}"
>>> diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
>>> deleted file mode 100644
>>> index deeaee988a01..
>>> --- a/gcc/config/bpf/bpf-passes.def
>>> +++ /dev/null
>>> @@ -1,20 +0,0 @@
>>> -/* Declaration of target-specific passes for eBPF.
>>> -   Copyright (C) 2021-2023 Free Software Foundation, Inc.
>>> -
>>> -   This file is part of GCC.
>>> -
>>> -   GCC is free software; you can redistribute it and/or modify it
>>> -   under the terms of the GNU General Public License as published by
>>> -   the Free Software Foundation; either version 3, or (at your option)
>>> -   any later version.
>>> -
>>> -   GCC is distributed in the hope that it will be useful, but
>>> -   WITHOUT ANY WARRANTY; without even the implied warranty of
>>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> -   General Public License for more details.
>>> -
>>> -   You should have received a copy of the GNU General Public License
>>> -   along with GCC; see the file COPYING3.  If not see
>>> -   .  */
>>> -
>>> -INSERT_PASS_AFTER (pass_df_initialize_opt, 1, pass_bpf_core_attr);
>>> diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
>>> index b484310e8cbf..fbcf5111eb21 100644
>>> --- a/gcc/config/bpf/bpf-protos.h
>>> +++ b/gcc/config/bpf/bpf-protos.h
>>> @@ -30,7 +30,7 @@ extern void bpf_print_operand_address (FILE *, rtx);
>>>  extern void bpf_expand_prologue (void);
>>>  extern void bpf_expand_epilogue (void);
>>>  extern void bpf_expand_cbranch (machine_mode, rtx 

Re: [PATCH 1/2] bpf: Implementation of BPF CO-RE builtins

2023-08-03 Thread Jose E. Marchesi via Gcc-patches


> This patch updates the support for the BPF CO-RE builtins
> __builtin_preserve_access_index and __builtin_preserve_field_info,
> and adds support for the CO-RE builtins __builtin_btf_type_id,
> __builtin_preserve_type_info and __builtin_preserve_enum_value.
>
> These CO-RE relocations are now converted to __builtin_core_reloc which
> abstracts all of the original builtins in a polymorphic relocation
> specific builtin.
>
> The builtin processing is now split in 2 stages, the first (pack) is
> executed right after the front-end and the second (process) right before
> the asm output.
>
> In expand pass the __builtin_core_reloc is converted to a
> unspec:UNSPEC_CORE_RELOC rtx entry.
>
> The data required to process the builtin is now collected in the packing
> stage (after front-end), not allowing the compiler to optimize any of
> the relevant information required to compose the relocation when
> necessary.
> At expansion, that information is recovered and CTF/BTF is queried to
> construct the information that will be used in the relocation.
> At this point the relocation is added to specific section and the
> builtin is expanded to the expected default value for the builtin.
>
> In order to process __builtin_preserve_enum_value, it was necessary to
> hook the front-end to collect the original enum value reference.
> This is needed since the parser folds all the enum values to its
> integer_cst representation.
>
> More details can be found within the core-builtins.cc.
>
> Regtested in host x86_64-linux-gnu and target bpf-unknown-none.
> ---
>  gcc/config.gcc|4 +-
>  gcc/config/bpf/bpf-passes.def |   20 -
>  gcc/config/bpf/bpf-protos.h   |4 +-
>  gcc/config/bpf/bpf.cc |  817 +-
>  gcc/config/bpf/bpf.md |   17 +
>  gcc/config/bpf/core-builtins.cc   | 1397 +
>  gcc/config/bpf/core-builtins.h|   36 +
>  gcc/config/bpf/coreout.cc |   50 +-
>  gcc/config/bpf/coreout.h  |   13 +-
>  gcc/config/bpf/t-bpf  |6 +-
>  gcc/doc/extend.texi   |   51 +
>  ...core-builtin-fieldinfo-const-elimination.c |   29 +
>  12 files changed, 1639 insertions(+), 805 deletions(-)
>  delete mode 100644 gcc/config/bpf/bpf-passes.def
>  create mode 100644 gcc/config/bpf/core-builtins.cc
>  create mode 100644 gcc/config/bpf/core-builtins.h
>  create mode 100644 
> gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-const-elimination.c
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index eba69a463be0..c521669e78b1 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1597,8 +1597,8 @@ bpf-*-*)
>  use_collect2=no
>  extra_headers="bpf-helpers.h"
>  use_gcc_stdint=provide
> -extra_objs="coreout.o"
> -target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc"
> +extra_objs="coreout.o core-builtins.o"
> +target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc 
> \$(srcdir)/config/bpf/core-builtins.cc"
>  ;;
>  cris-*-elf | cris-*-none)
>   tm_file="elfos.h newlib-stdint.h ${tm_file}"
> diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
> deleted file mode 100644
> index deeaee988a01..
> --- a/gcc/config/bpf/bpf-passes.def
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/* Declaration of target-specific passes for eBPF.
> -   Copyright (C) 2021-2023 Free Software Foundation, Inc.
> -
> -   This file is part of GCC.
> -
> -   GCC is free software; you can redistribute it and/or modify it
> -   under the terms of the GNU General Public License as published by
> -   the Free Software Foundation; either version 3, or (at your option)
> -   any later version.
> -
> -   GCC is distributed in the hope that it will be useful, but
> -   WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   General Public License for more details.
> -
> -   You should have received a copy of the GNU General Public License
> -   along with GCC; see the file COPYING3.  If not see
> -   .  */
> -
> -INSERT_PASS_AFTER (pass_df_initialize_opt, 1, pass_bpf_core_attr);
> diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
> index b484310e8cbf..fbcf5111eb21 100644
> --- a/gcc/config/bpf/bpf-protos.h
> +++ b/gcc/config/bpf/bpf-protos.h
> @@ -30,7 +30,7 @@ extern void bpf_print_operand_address (FILE *, rtx);
>  extern void bpf_expand_prologue (void);
>  extern void bpf_expand_epilogue (void);
>  extern void bpf_expand_cbranch (machine_mode, rtx *);
> -
> -rtl_opt_pass * make_pass_bpf_core_attr (gcc::context *);
> +const char *bpf_add_core_reloc (rtx *operands, const char *templ);
> +void bpf_process_move_operands (rtx *operands);
>  
>  #endif /* ! GCC_BPF_PROTOS_H */
> diff 

[COMMITTED] bpf: disable tail call optimization in BPF targets

2023-07-28 Thread Jose E. Marchesi via Gcc-patches
clang disables tail call optimizations in BPF targets.  Do the same in
GCC.

gcc/ChangeLog:

* config/bpf/bpf.cc (bpf_option_override): Disable tail-call
optimizations in BPF target.
---
 gcc/config/bpf/bpf.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index b5b5674edbb..57817cdf2f8 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -273,6 +273,9 @@ bpf_option_override (void)
  "on this architecture");
   flag_stack_protect = 0;
 }
+
+  /* The BPF target does not support tail call optimization.  */
+  flag_optimize_sibling_calls = 0;
 }
 
 #undef TARGET_OPTION_OVERRIDE
-- 
2.30.2



Re: [PATCH] bpf: ISA V4 sign-extending move and load insns [PR110782,PR110784]

2023-07-27 Thread Jose E. Marchesi via Gcc-patches


Hi David.
Thanks for the patch.

> BPF ISA V4 introduces sign-extending move and load operations.  This
> patch makes the BPF backend generate those instructions, when enabled
> and useful.
>
> A new option, -m[no-]smov gates generation of these instructions, and is
> enabled by default for -mcpu=v4 and above.  Tests for the new
> instructions and documentation for the new options are included.
>
> Tested on bpf-unknown-none.
> OK?
>
> gcc/
>
>   * config/bpf/bpf.opt (msmov): New option.
>   * config/bpf/bpf.cc (bpf_option_override): Handle it here.
>   * config/bpf/bpf.md (*extendsidi2): New.
>   (extendhidi2): New.
>   (extendqidi2): New.
>   (extendsisi2): New.
>   (extendhisi2): New.
>   (extendqisi2): New.
>   * doc/invoke.texi (Option Summary): Add -msmov eBPF option.
>   (eBPF Options): Add -m[no-]smov.  Document that -mcpu=v4
>   also enables -msmov.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/sload-1.c: New test.
>   * gcc.target/bpf/sload-pseudoc-1.c: New test.
>   * gcc.target/bpf/smov-1.c: New test.
>   * gcc.target/bpf/smov-pseudoc-1.c: New test.

Looks like you forgot to mention the bugzilla PR in the changelog
entries.  Would be nice to have them there so automatic updates happen
in the bugzillas.

Other than that, OK.
Thanks!

> ---
>  gcc/config/bpf/bpf.cc |  3 ++
>  gcc/config/bpf/bpf.md | 50 +++
>  gcc/config/bpf/bpf.opt|  4 ++
>  gcc/doc/invoke.texi   |  9 +++-
>  gcc/testsuite/gcc.target/bpf/sload-1.c| 16 ++
>  .../gcc.target/bpf/sload-pseudoc-1.c  | 16 ++
>  gcc/testsuite/gcc.target/bpf/smov-1.c | 18 +++
>  gcc/testsuite/gcc.target/bpf/smov-pseudoc-1.c | 18 +++
>  8 files changed, 133 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/sload-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/sload-pseudoc-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/smov-pseudoc-1.c
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 0e07b416add..b5b5674edbb 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -262,6 +262,9 @@ bpf_option_override (void)
>if (bpf_has_sdiv == -1)
>  bpf_has_sdiv = (bpf_isa >= ISA_V4);
>  
> +  if (bpf_has_smov == -1)
> +bpf_has_smov = (bpf_isa >= ISA_V4);
> +
>/* Disable -fstack-protector as it is not supported in BPF.  */
>if (flag_stack_protect)
>  {
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 66436397bb7..a69a239b9d6 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -307,6 +307,56 @@ (define_expand "extendsidi2"
>DONE;
>  })
>  
> +;; ISA V4 introduces sign-extending move and load operations.
> +
> +(define_insn "*extendsidi2"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,q")))]
> +  "bpf_has_smov"
> +  "@
> +   {movs\t%0,%1,32|%0 = (s32) %1}
> +   {ldxsw\t%0,%1|%0 = *(s32 *) (%1)}"
> +  [(set_attr "type" "alu,ldx")])
> +
> +(define_insn "extendhidi2"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,q")))]
> +  "bpf_has_smov"
> +  "@
> +   {movs\t%0,%1,16|%0 = (s16) %1}
> +   {ldxsh\t%0,%1|%0 = *(s16 *) (%1)}"
> +  [(set_attr "type" "alu,ldx")])
> +
> +(define_insn "extendqidi2"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +(sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "r,q")))]
> +  "bpf_has_smov"
> +  "@
> +   {movs\t%0,%1,8|%0 = (s8) %1}
> +   {ldxsb\t%0,%1|%0 = *(s8 *) (%1)}"
> +  [(set_attr "type" "alu,ldx")])
> +
> +(define_insn "extendsisi2"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +(sign_extend:SI (match_operand:SI 1 "register_operand" "r")))]
> +  "bpf_has_smov"
> +  "{movs32\t%0,%1,32|%w0 = (s32) %w1}"
> +  [(set_attr "type" "alu")])
> +
> +(define_insn "extendhisi2"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
> +  "bpf_has_smov"
> +  "{movs32\t%0,%1,16|%w0 = (s16) %w1}"
> +  [(set_attr "type" "alu")])
> +
> +(define_insn "extendqisi2"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
> +  "bpf_has_smov"
> +  "{movs32\t%0,%1,8|%w0 = (s8) %w1}"
> +  [(set_attr "type" "alu")])
> +
>   Data movement
>  
>  (define_mode_iterator MM [QI HI SI DI SF DF])
> diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
> index b21cfcab9ea..8e240d397e4 100644
> --- a/gcc/config/bpf/bpf.opt
> +++ b/gcc/config/bpf/bpf.opt
> @@ -71,6 +71,10 @@ msdiv
>  Target Var(bpf_has_sdiv) Init(-1)
>  Enable signed division and modulus instructions.
>  
> +msmov
> +Target 

Re: [PATCH] bpf: minor doc cleanup for command-line options

2023-07-27 Thread Jose E. Marchesi via Gcc-patches


Hi David, thanks for the patch.
OK.


> This patch makes some minor cleanups to eBPF options documented in
> invoke.texi:
>  - Delete some vestigal docs for removed -mkernel option
>  - Add -mbswap and -msdiv to the option summary
>  - Note the negative versions of several options
>  - Note that -mcpu=v4 also enables -msdiv.
>
> gcc/
>
>   * doc/invoke.texi (Option Summary): Remove -mkernel eBPF option.
>   Add -mbswap and -msdiv eBPF options.
>   (eBPF Options): Remove -mkernel.  Add -mno-{jmpext, jmp32,
>   alu32, v3-atomics, bswap, sdiv}.  Document that -mcpu=v4 also
>   enables -msdiv.
> ---
>  gcc/doc/invoke.texi | 48 ++---
>  1 file changed, 23 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e0fd7bd5b72..91113dd5821 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -945,9 +945,10 @@ Objective-C and Objective-C++ Dialects}.
>  -mmemory-latency=@var{time}}
>  
>  @emph{eBPF Options}
> -@gccoptlist{-mbig-endian -mlittle-endian -mkernel=@var{version}
> +@gccoptlist{-mbig-endian -mlittle-endian
>  -mframe-limit=@var{bytes} -mxbpf -mco-re -mno-co-re -mjmpext
> --mjmp32 -malu32 -mv3-atomics -mcpu=@var{version} -masm=@var{dialect}}
> +-mjmp32 -malu32 -mv3-atomics -mbswap -msdiv -mcpu=@var{version}
> +-masm=@var{dialect}}
>  
>  @emph{FR30 Options}
>  @gccoptlist{-msmall-model  -mno-lsim}
> @@ -24674,18 +24675,6 @@ the value that can be specified should be less than 
> or equal to
>  @samp{32767}.  Defaults to whatever limit is imposed by the version of
>  the Linux kernel targeted.
>  
> -@opindex mkernel
> -@item -mkernel=@var{version}
> -This specifies the minimum version of the kernel that will run the
> -compiled program.  GCC uses this version to determine which
> -instructions to use, what kernel helpers to allow, etc.  Currently,
> -@var{version} can be one of @samp{4.0}, @samp{4.1}, @samp{4.2},
> -@samp{4.3}, @samp{4.4}, @samp{4.5}, @samp{4.6}, @samp{4.7},
> -@samp{4.8}, @samp{4.9}, @samp{4.10}, @samp{4.11}, @samp{4.12},
> -@samp{4.13}, @samp{4.14}, @samp{4.15}, @samp{4.16}, @samp{4.17},
> -@samp{4.18}, @samp{4.19}, @samp{4.20}, @samp{5.0}, @samp{5.1},
> -@samp{5.2}, @samp{latest} and @samp{native}.
> -
>  @opindex mbig-endian
>  @item -mbig-endian
>  Generate code for a big-endian target.
> @@ -24696,30 +24685,38 @@ Generate code for a little-endian target.  This is 
> the default.
>  
>  @opindex mjmpext
>  @item -mjmpext
> -Enable generation of extra conditional-branch instructions.
> +@itemx -mno-jmpext
> +Enable or disable generation of extra conditional-branch instructions.
>  Enabled for CPU v2 and above.
>  
>  @opindex mjmp32
>  @item -mjmp32
> -Enable 32-bit jump instructions. Enabled for CPU v3 and above.
> +@itemx -mno-jmp32
> +Enable or disable generation of 32-bit jump instructions.
> +Enabled for CPU v3 and above.
>  
>  @opindex malu32
>  @item -malu32
> -Enable 32-bit ALU instructions. Enabled for CPU v3 and above.
> +@itemx -mno-alu32
> +Enable or disable generation of 32-bit ALU instructions.
> +Enabled for CPU v3 and above.
> +
> +@opindex mv3-atomics
> +@item -mv3-atomics
> +@itemx -mno-v3-atomics
> +Enable or disable instructions for general atomic operations introduced
> +in CPU v3.  Enabled for CPU v3 and above.
>  
>  @opindex mbswap
>  @item -mbswap
> -Enable byte swap instructions.  Enabled for CPU v4 and above.
> +@itemx -mno-bswap
> +Enable or disable byte swap instructions.  Enabled for CPU v4 and above.
>  
>  @opindex msdiv
>  @item -msdiv
> -Enable signed division and modulus instructions.  Enabled for CPU v4
> -and above.
> -
> -@opindex mv3-atomics
> -@item -mv3-atomics
> -Enable instructions for general atomic operations introduced in CPU v3.
> -Enabled for CPU v3 and above.
> +@itemx -mno-sdiv
> +Enable or disable signed division and modulus instructions.  Enabled for
> +CPU v4 and above.
>  
>  @opindex mcpu
>  @item -mcpu=@var{version}
> @@ -24747,6 +24744,7 @@ All features of v2, plus:
>  All features of v3, plus:
>  @itemize @minus
>  @item Byte swap instructions, as in @option{-mbswap}
> +@item Signed division and modulus instructions, as in @option{-msdiv}
>  @end itemize
>  @end table


Re: [PATCH] bpf: correct pseudo-C template for add3 and sub3

2023-07-27 Thread Jose E. Marchesi via Gcc-patches


> The pseudo-C output templates for these instructions were incorrectly
> using operand 1 rather than operand 2 on the RHS, which led to some
> very incorrect assembly generation with -masm=pseudoc.
>
> Tested on bpf-unknown-none.
> OK?

OK.  Thanks for spotting and fixing this!

>
> gcc/
>
>   * config/bpf/bpf.md (add3): Use %w2 instead of %w1
>   in pseudo-C dialect output template.
>   (sub3): Likewise.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/alu-2.c: New test.
>   * gcc.target/bpf/alu-pseudoc-2.c: Likewise.
> ---
>  gcc/config/bpf/bpf.md|  4 ++--
>  gcc/testsuite/gcc.target/bpf/alu-2.c | 12 
>  gcc/testsuite/gcc.target/bpf/alu-pseudoc-2.c | 13 +
>  3 files changed, 27 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/alu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/alu-pseudoc-2.c
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 2ffc4ebd17e..66436397bb7 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -131,7 +131,7 @@ (define_insn "add3"
>  (plus:AM (match_operand:AM 1 "register_operand"   " 0,0")
>   (match_operand:AM 2 "reg_or_imm_operand" " r,I")))]
>"1"
> -  "{add\t%0,%2|%w0 += %w1}"
> +  "{add\t%0,%2|%w0 += %w2}"
>[(set_attr "type" "")])
>  
>  ;;; Subtraction
> @@ -144,7 +144,7 @@ (define_insn "sub3"
>  (minus:AM (match_operand:AM 1 "register_operand" " 0")
>(match_operand:AM 2 "register_operand" " r")))]
>""
> -  "{sub\t%0,%2|%w0 -= %w1}"
> +  "{sub\t%0,%2|%w0 -= %w2}"
>[(set_attr "type" "")])
>  
>  ;;; Negation
> diff --git a/gcc/testsuite/gcc.target/bpf/alu-2.c 
> b/gcc/testsuite/gcc.target/bpf/alu-2.c
> new file mode 100644
> index 000..0444a9bc68a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/alu-2.c
> @@ -0,0 +1,12 @@
> +/* Check add and sub instructions.  */
> +/* { dg-do compile } */
> +/* { dg-options "" } */
> +
> +long foo (long x, long y)
> +{
> +  return y - x + 4;
> +}
> +
> +/* { dg-final { scan-assembler-not {sub\t(%r.),\1\n} } } */
> +/* { dg-final { scan-assembler {sub\t(\%r.),(\%r.)\n} } } */
> +/* { dg-final { scan-assembler {add\t(\%r.),4\n} } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/alu-pseudoc-2.c 
> b/gcc/testsuite/gcc.target/bpf/alu-pseudoc-2.c
> new file mode 100644
> index 000..751db2477c0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/alu-pseudoc-2.c
> @@ -0,0 +1,13 @@
> +/* Check add and sub instructions (pseudoc asm dialect).  */
> +/* { dg-do compile } */
> +/* { dg-options "-masm=pseudoc" } */
> +
> +long foo (long x, long y)
> +{
> +  return y - x + 4;
> +}
> +
> +/* { dg-final { scan-assembler-not {\t(r.) -= \1\n} } } */
> +/* { dg-final { scan-assembler {\t(r.) -= (r.)\n} } } */
> +/* { dg-final { scan-assembler {\t(r.) \+= 4\n} } } */
> +


Re: [PATCH v2 2/2] bpf: add v3 atomic instructions

2023-07-26 Thread Jose E. Marchesi via Gcc-patches


OK.
Thanks!

> [Changes from v1: fix merge issue in invoke.texi]
>
> This patch adds support for the general atomic operations introduced in
> eBPF v3. In addition to the existing atomic add instruction, this adds:
>  - Atomic and, or, xor
>  - Fetching versions of these operations (including add)
>  - Atomic exchange
>  - Atomic compare-and-exchange
>
> To control emission of these instructions, a new target option
> -m[no-]v3-atomics is added. This option is enabled by -mcpu=v3
> and above.
>
> Support for these instructions was recently added in binutils.
>
> gcc/
>
>   * config/bpf/bpf.opt (mv3-atomics): New option.
>   * config/bpf/bpf.cc (bpf_option_override): Handle it here.
>   * config/bpf/bpf.h (enum_reg_class): Add R0 class.
>   (REG_CLASS_NAMES): Likewise.
>   (REG_CLASS_CONTENTS): Likewise.
>   (REGNO_REG_CLASS): Handle R0.
>   * config/bpf/bpf.md (UNSPEC_XADD): Rename to UNSPEC_AADD.
>   (UNSPEC_AAND): New unspec.
>   (UNSPEC_AOR): Likewise.
>   (UNSPEC_AXOR): Likewise.
>   (UNSPEC_AFADD): Likewise.
>   (UNSPEC_AFAND): Likewise.
>   (UNSPEC_AFOR): Likewise.
>   (UNSPEC_AFXOR): Likewise.
>   (UNSPEC_AXCHG): Likewise.
>   (UNSPEC_ACMPX): Likewise.
>   (atomic_add): Use UNSPEC_AADD and atomic type attribute.
>   Move to...
>   * config/bpf/atomic.md: ...Here. New file.
>   * config/bpf/constraints.md (t): New constraint for R0.
>   * doc/invoke.texi (eBPF Options): Document -mv3-atomics.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/atomic-cmpxchg-1.c: New test.
>   * gcc.target/bpf/atomic-cmpxchg-2.c: New test.
>   * gcc.target/bpf/atomic-fetch-op-1.c: New test.
>   * gcc.target/bpf/atomic-fetch-op-2.c: New test.
>   * gcc.target/bpf/atomic-fetch-op-3.c: New test.
>   * gcc.target/bpf/atomic-op-1.c: New test.
>   * gcc.target/bpf/atomic-op-2.c: New test.
>   * gcc.target/bpf/atomic-op-3.c: New test.
>   * gcc.target/bpf/atomic-xchg-1.c: New test.
>   * gcc.target/bpf/atomic-xchg-2.c: New test.
> ---
>  gcc/config/bpf/atomic.md  | 185 ++
>  gcc/config/bpf/bpf.cc |   3 +
>  gcc/config/bpf/bpf.h  |   6 +-
>  gcc/config/bpf/bpf.md |  29 ++-
>  gcc/config/bpf/bpf.opt|   4 +
>  gcc/config/bpf/constraints.md |   3 +
>  gcc/doc/invoke.texi   |   8 +-
>  .../gcc.target/bpf/atomic-cmpxchg-1.c |  19 ++
>  .../gcc.target/bpf/atomic-cmpxchg-2.c |  19 ++
>  .../gcc.target/bpf/atomic-fetch-op-1.c|  50 +
>  .../gcc.target/bpf/atomic-fetch-op-2.c|  50 +
>  .../gcc.target/bpf/atomic-fetch-op-3.c|  49 +
>  gcc/testsuite/gcc.target/bpf/atomic-op-1.c|  49 +
>  gcc/testsuite/gcc.target/bpf/atomic-op-2.c|  49 +
>  gcc/testsuite/gcc.target/bpf/atomic-op-3.c|  49 +
>  gcc/testsuite/gcc.target/bpf/atomic-xchg-1.c  |  20 ++
>  gcc/testsuite/gcc.target/bpf/atomic-xchg-2.c  |  20 ++
>  17 files changed, 593 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/config/bpf/atomic.md
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-cmpxchg-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-cmpxchg-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-fetch-op-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-fetch-op-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-fetch-op-3.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-op-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-op-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-op-3.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-xchg-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/atomic-xchg-2.c
>
> diff --git a/gcc/config/bpf/atomic.md b/gcc/config/bpf/atomic.md
> new file mode 100644
> index 000..caf8cc15cd4
> --- /dev/null
> +++ b/gcc/config/bpf/atomic.md
> @@ -0,0 +1,185 @@
> +;; Machine description for eBPF.
> +;; Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +
> +
> +(define_mode_iterator AMO [SI DI])
> +
> +;;; Plain atomic modify operations.
> +
> +;; Non-fetching atomic add predates all 

[COMMITTED] bpf: fix generation of neg and neg32 BPF instructions

2023-07-26 Thread Jose E. Marchesi via Gcc-patches
This patch fixes GCC to generate correct neg and neg32 instructions,
which do not take a source register operand.  A couple of new tests
are added.

Tested in bpf-unknown-none.

gcc/ChangeLog

2023-07-26  Jose E. Marchesi  

* config/bpf/bpf.md: Fix neg{SI,DI}2 insn.

gcc/testsuite/ChangeLog

2023-07-26  Jose E. Marchesi  

* gcc.target/bpf/neg-1.c: New test.
* gcc.target/bpf/neg-pseudoc-1.c: Likewise.
---
 gcc/config/bpf/bpf.md|  4 ++--
 gcc/testsuite/gcc.target/bpf/neg-1.c | 14 ++
 gcc/testsuite/gcc.target/bpf/neg-pseudoc-1.c | 14 ++
 3 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/neg-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/neg-pseudoc-1.c

diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index 579a8213b09..1b5e1900d4f 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -142,9 +142,9 @@ (define_insn "sub3"
 ;;; Negation
 (define_insn "neg2"
   [(set (match_operand:AM 0 "register_operand"   "=r,r")
-(neg:AM (match_operand:AM 1 "reg_or_imm_operand" " r,I")))]
+(neg:AM (match_operand:AM 1 "reg_or_imm_operand" " 0,I")))]
   ""
-  "{neg\t%0,%1|%w0 = -%w1}"
+  "{neg\t%0|%w0 = -%w1}"
   [(set_attr "type" "")])
 
 ;;; Multiplication
diff --git a/gcc/testsuite/gcc.target/bpf/neg-1.c 
b/gcc/testsuite/gcc.target/bpf/neg-1.c
new file mode 100644
index 000..9ffb956859d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/neg-1.c
@@ -0,0 +1,14 @@
+/* Check negr and negr32 instructions.  */
+
+/* { dg-do compile } */
+/* { dg-options "-malu32" } */
+
+long foo (long a, long b, int x, int y)
+{
+  a = -b;
+  x = -y;
+  return a + x;
+}
+
+/* { dg-final { scan-assembler "neg\t%r.\n" } } */
+/* { dg-final { scan-assembler "neg32\t%r.\n" } } */
diff --git a/gcc/testsuite/gcc.target/bpf/neg-pseudoc-1.c 
b/gcc/testsuite/gcc.target/bpf/neg-pseudoc-1.c
new file mode 100644
index 000..a4fb687f04a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/neg-pseudoc-1.c
@@ -0,0 +1,14 @@
+/* Check negr and negr32 instructions (pseudoc asm dialect.)  */
+
+/* { dg-do compile } */
+/* { dg-options "-malu32 -masm=pseudoc" } */
+
+long foo (long a, long b, int x, int y)
+{
+  a = -b;
+  x = -y;
+  return a + x;
+}
+
+/* { dg-final { scan-assembler {\t(r.) = -\1\n} } } */
+/* { dg-final { scan-assembler {\t(w.) = -\1\n} } } */
-- 
2.30.2



Re: [PATCH 2/2] bpf: add v3 atomic instructions

2023-07-25 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> +<<< HEAD

There is a merge problem there.

>  @opindex mbswap
>  @item -mbswap
>  Enable byte swap instructions.  Enabled for CPU v4 and above.
> @@ -24715,6 +24716,12 @@ Enable byte swap instructions.  Enabled for CPU v4 
> and above.
>  @item -msdiv
>  Enable signed division and modulus instructions.  Enabled for CPU v4
>  and above.
> +===
> +@opindex mv3-atomics
> +@item -mv3-atomics
> +Enable instructions for general atomic operations introduced in CPU v3.
> +Enabled for CPU v3 and above.
> +>>> 6de76bd11b6 (bpf: add v3 atomic instructions)


Re: [PATCH 1/2] bpf: don't print () in bpf_print_operand_address

2023-07-25 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> Unfortunately, the pseudo-C dialect syntax used for some of the v3
> atomic instructions clashes with unconditionally printing the
> surrounding parentheses in bpf_print_operand_address.
>
> Instead, place the parentheses in the output templates where needed.
>
> Tested in bpf-unknown-none.
> OK?
>
> gcc/
>
>   * config/bpf/bpf.cc (bpf_print_operand_address): Don't print
>   enclosing parentheses for pseudo-C dialect.
>   * config/bpf/bpf.md (zero_exdendhidi2): Add parentheses around
>   operands of pseudo-C dialect output templates where needed.
>   (zero_extendqidi2): Likewise.
>   (zero_extendsidi2): Likewise.
>   (*mov): Likewise.
> ---
>  gcc/config/bpf/bpf.cc |  8 
>  gcc/config/bpf/bpf.md | 12 ++--
>  2 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 55b6927a62f..2c077ea834e 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -933,9 +933,9 @@ bpf_print_operand_address (FILE *file, rtx addr)
>switch (GET_CODE (addr))
>  {
>  case REG:
> -  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "");

We can save the call to fprintf there with a conditional.

>bpf_print_register (file, addr, 0);
> -  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0)");
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0");
>break;
>  case PLUS:
>{
> @@ -944,11 +944,11 @@ bpf_print_operand_address (FILE *file, rtx addr)
>  
>   if (GET_CODE (op0) == REG && GET_CODE (op1) == CONST_INT)
> {
> - fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> + fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "");

Likewise.

>   bpf_print_register (file, op0, 0);
>   fprintf (file, "+");
>   output_addr_const (file, op1);
> - fprintf (file, asm_dialect == ASM_NORMAL ? "]" : ")");
> + fprintf (file, asm_dialect == ASM_NORMAL ? "]" : "");
> }
>   else
> fatal_insn ("invalid address in operand", addr);
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 64342ea1de2..579a8213b09 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -260,7 +260,7 @@ (define_insn "zero_extendhidi2"
>"@
> {and\t%0,0x|%0 &= 0x}
> {mov\t%0,%1\;and\t%0,0x|%0 = %1;%0 &= 0x}
> -   {ldxh\t%0,%1|%0 = *(u16 *) %1}"
> +   {ldxh\t%0,%1|%0 = *(u16 *) (%1)}"
>[(set_attr "type" "alu,alu,ldx")])
>  
>  (define_insn "zero_extendqidi2"
> @@ -270,7 +270,7 @@ (define_insn "zero_extendqidi2"
>"@
> {and\t%0,0xff|%0 &= 0xff}
> {mov\t%0,%1\;and\t%0,0xff|%0 = %1;%0 &= 0xff}
> -   {ldxh\t%0,%1|%0 = *(u8 *) %1}"
> +   {ldxh\t%0,%1|%0 = *(u8 *) (%1)}"
>[(set_attr "type" "alu,alu,ldx")])
>  
>  (define_insn "zero_extendsidi2"
> @@ -280,7 +280,7 @@ (define_insn "zero_extendsidi2"
>""
>"@
> * return bpf_has_alu32 ? \"{mov32\t%0,%1|%0 = %1}\" : 
> \"{mov\t%0,%1\;and\t%0,0x|%0 = %1;%0 &= 0x}\";
> -   {ldxw\t%0,%1|%0 = *(u32 *) %1}"
> +   {ldxw\t%0,%1|%0 = *(u32 *) (%1)}"
>[(set_attr "type" "alu,ldx")])
>  
>  ;;; Sign-extension
> @@ -319,11 +319,11 @@ (define_insn "*mov"
>  (match_operand:MM 1 "mov_src_operand"  " q,rI,B,r,I"))]
>""
>"@
> -   {ldx\t%0,%1|%0 = *( *) %1}
> +   {ldx\t%0,%1|%0 = *( *) (%1)}
> {mov\t%0,%1|%0 = %1}
> {lddw\t%0,%1|%0 = %1 ll}
> -   {stx\t%0,%1|*( *) %0 = %1}
> -   {st\t%0,%1|*( *) %0 = %1}"
> +   {stx\t%0,%1|*( *) (%0) = %1}
> +   {st\t%0,%1|*( *) (%0) = %1}"
>  [(set_attr "type" "ldx,alu,alu,stx,st")])
>  
>   Shifts

Otherwise, LGTM.
OK.

Thanks!


Re: [COMMITTED] bpf: add pseudo-c asm dialect for "nop"

2023-07-25 Thread Jose E. Marchesi via Gcc-patches


> The define_insn "nop" was missing a template for the pseudo-c dialect,
> so the normal syntax was unconditionally emitted.

Thank you.

> Tested on bpf-unknown-none, committed as obvious.
>
> gcc/
>
>   * config/bpf/bpf.md (nop): Add pseudo-c asm dialect template.
> ---
>  gcc/config/bpf/bpf.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 3e2d760fbe4..64342ea1de2 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -103,7 +103,7 @@ (define_mode_attr msuffix [(SI "32") (DI "")])
>  (define_insn "nop"
>[(const_int 0)]
>""
> -  "ja\t0"
> +  "{ja\t0|goto 0}"
>[(set_attr "type" "alu")])
>  
>   Arithmetic/Logical


[COMMITTED] bpf: sdiv/smod are now part of BPF V4

2023-07-24 Thread Jose E. Marchesi via Gcc-patches
We used to support signed division and signed modulus instructions in
the XBPF GCC-specific extensions to BPF.  However, BPF catched up by
adding these instructions in the V4 of the ISA.

This patch changes GCC in order to use sdiv/smod instructions when
-mcpu=v4 or higher.  The testsuite and the manual have been updated
accordingly.

Tested in bpf-unknown-none.

gcc/ChangeLog

PR target/110783
* config/bpf/bpf.opt: New command-line option -msdiv.
* config/bpf/bpf.md: Conditionalize sdiv/smod on bpf_has_sdiv.
* config/bpf/bpf.cc (bpf_option_override): Initialize
bpf_has_sdiv.
* doc/invoke.texi (eBPF Options): Document -msdiv.

gcc/testsuite/ChangeLog

PR target/110783
* gcc.target/bpf/xbpf-sdiv-1.c: Renamed to sdiv-1.c
* gcc.target/bpf/xbpf-smod-1.c: Renamed to smod-1.c
* gcc.target/bpf/sdiv-1.c: Renamed from xbpf-sdiv-1.c, use -mcpu=v4.
* gcc.target/bpf/smod-1.c: Renamed from xbpf-smod-1.c, use -mcpu=v4.
* gcc.target/bpf/diag-sdiv.c: Use -mcpu=v3.
* gcc.target/bpf/diag-smod.c: Likewise.
---
 gcc/config/bpf/bpf.cc|  3 +++
 gcc/config/bpf/bpf.md| 16 
 gcc/config/bpf/bpf.opt   |  4 
 gcc/doc/invoke.texi  |  5 +
 gcc/testsuite/gcc.target/bpf/diag-sdiv.c |  2 +-
 gcc/testsuite/gcc.target/bpf/diag-smod.c |  2 +-
 .../gcc.target/bpf/{xbpf-sdiv-1.c => sdiv-1.c}   |  2 +-
 .../gcc.target/bpf/{xbpf-smod-1.c => smod-1.c}   |  2 +-
 8 files changed, 24 insertions(+), 12 deletions(-)
 rename gcc/testsuite/gcc.target/bpf/{xbpf-sdiv-1.c => sdiv-1.c} (86%)
 rename gcc/testsuite/gcc.target/bpf/{xbpf-smod-1.c => smod-1.c} (86%)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 18d3b5f14d6..55b6927a62f 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -256,6 +256,9 @@ bpf_option_override (void)
   if (bpf_has_bswap == -1)
 bpf_has_bswap = (bpf_isa >= ISA_V4);
 
+  if (bpf_has_sdiv == -1)
+bpf_has_sdiv = (bpf_isa >= ISA_V4);
+
   /* Disable -fstack-protector as it is not supported in BPF.  */
   if (flag_stack_protect)
 {
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index 81e2268c400..3e2d760fbe4 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -167,8 +167,8 @@ (define_insn "*mulsidi3_zeroextend"
 
 ;;; Division
 
-;; Note that eBPF doesn't provide instructions for signed integer
-;; division.
+;; Note that eBPF <= V3 doesn't provide instructions for signed
+;; integer division.
 
 (define_insn "udiv3"
   [(set (match_operand:AM 0 "register_operand" "=r,r")
@@ -178,20 +178,20 @@ (define_insn "udiv3"
   "{div\t%0,%2|%w0 /= %w2}"
   [(set_attr "type" "")])
 
-;; However, xBPF does provide a signed division operator, sdiv.
+;; However, BPF V4 does provide a signed division operator, sdiv.
 
 (define_insn "div3"
   [(set (match_operand:AM 0 "register_operand" "=r,r")
 (div:AM (match_operand:AM 1 "register_operand" " 0,0")
 (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
-  "TARGET_XBPF"
+  "bpf_has_sdiv"
   "{sdiv\t%0,%2|%w0 s/= %w2}"
   [(set_attr "type" "")])
 
 ;;; Modulus
 
-;; Note that eBPF doesn't provide instructions for signed integer
-;; remainder.
+;; Note that eBPF <= V3 doesn't provide instructions for signed
+;; integer remainder.
 
 (define_insn "umod3"
   [(set (match_operand:AM 0 "register_operand" "=r,r")
@@ -201,13 +201,13 @@ (define_insn "umod3"
   "{mod\t%0,%2|%w0 %%= %w2}"
   [(set_attr "type" "")])
 
-;; Again, xBPF provides a signed version, smod.
+;; However, BPF V4 does provide a signed modulus operator, smod.
 
 (define_insn "mod3"
   [(set (match_operand:AM 0 "register_operand" "=r,r")
 (mod:AM (match_operand:AM 1 "register_operand" " 0,0")
 (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
-  "TARGET_XBPF"
+  "bpf_has_sdiv"
   "{smod\t%0,%2|%w0 s%%= %w2}"
   [(set_attr "type" "")])
 
diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
index 3bf9033279b..bd35f8dbd0c 100644
--- a/gcc/config/bpf/bpf.opt
+++ b/gcc/config/bpf/bpf.opt
@@ -63,6 +63,10 @@ mbswap
 Target Var(bpf_has_bswap) Init(-1)
 Enable byte swap instructions.
 
+msdiv
+Target Var(bpf_has_sdiv) Init(-1)
+Enable signed division and modulus instructions.
+
 mcpu=
 Target RejectNegative Joined Var(bpf_isa) Enum(bpf_isa) Init(ISA_V4)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a977a34db42..fa765d5a0dd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24711,6 +24711,11 @@ Enable 32-bit ALU instructions. Enabled for CPU v3 and 
above.
 @item -mbswap
 Enable byte swap instructions.  Enabled for CPU v4 and above.
 
+@opindex msdiv
+@item -msdiv
+Enable signed division and modulus instructions.  Enabled for CPU v4
+and above.
+
 @opindex mcpu
 @item -mcpu=@var{version}
 This specifies which version of the eBPF ISA to target. Newer 

[COMMITTED] bpf: remove -mkernel option and BPF_KERNEL_VERSION_CODE

2023-07-24 Thread Jose E. Marchesi via Gcc-patches
Having the ability of specifying a target kernel version when building
a BPF program is one of these things that sound pretty good in theory,
but simply don't work in practice: kernels in practice contain
backports, etc.  Also, the addition of CO-RE to BPF has made this
uneccessary.

This patch removes the -mkernel command line option and also the
associated BPF_KERNEL_VERSION_CODE pre-processor constant.

Tested in bpf-unknown-none.

gcc/ChangeLog

* config/bpf/bpf-opts.h (enum bpf_kernel_version): Remove enum.
* config/bpf/bpf.opt (mkernel): Remove option.
* config/bpf/bpf.cc (bpf_target_macros): Do not define
BPF_KERNEL_VERSION_CODE.
---
 gcc/config/bpf/bpf-opts.h | 33 ---
 gcc/config/bpf/bpf.cc | 40 --
 gcc/config/bpf/bpf.opt| 87 ---
 3 files changed, 160 deletions(-)

diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
index e0be591b479..5c9441ccb28 100644
--- a/gcc/config/bpf/bpf-opts.h
+++ b/gcc/config/bpf/bpf-opts.h
@@ -20,39 +20,6 @@
 #ifndef BPF_OPTS_H
 #define BPF_OPTS_H
 
-/* Supported versions of the Linux kernel.  */
-enum bpf_kernel_version
-{
-  /* Linux 4.x */
-  LINUX_V4_0,
-  LINUX_V4_1,
-  LINUX_V4_2,
-  LINUX_V4_3,
-  LINUX_V4_4,
-  LINUX_V4_5,
-  LINUX_V4_6,
-  LINUX_V4_7,
-  LINUX_V4_8,
-  LINUX_V4_9,
-  LINUX_V4_10,
-  LINUX_V4_11,
-  LINUX_V4_12,
-  LINUX_V4_13,
-  LINUX_V4_14,
-  LINUX_V4_15,
-  LINUX_V4_16,
-  LINUX_V4_17,
-  LINUX_V4_18,
-  LINUX_V4_19,
-  LINUX_V4_20,
-  /* Linux 5.x  */
-  LINUX_V5_0,
-  LINUX_V5_1,
-  LINUX_V5_2,
-  LINUX_LATEST = LINUX_V5_2,
-  LINUX_NATIVE,
-};
-
 enum bpf_isa_version
 {
   ISA_V1,
diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 6bc715429dc..18d3b5f14d6 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -308,46 +308,6 @@ bpf_target_macros (cpp_reader *pfile)
 builtin_define ("__BPF_BIG_ENDIAN__");
   else
 builtin_define ("__BPF_LITTLE_ENDIAN__");
-
-  /* Define BPF_KERNEL_VERSION_CODE */
-  {
-const char *version_code;
-char *kernel_version_code;
-
-switch (bpf_kernel)
-  {
-  case LINUX_V4_0: version_code = "0x4"; break;
-  case LINUX_V4_1: version_code = "0x40100"; break;
-  case LINUX_V4_2: version_code = "0x40200"; break;
-  case LINUX_V4_3: version_code = "0x40300"; break;
-  case LINUX_V4_4: version_code = "0x40400"; break;
-  case LINUX_V4_5: version_code = "0x40500"; break;
-  case LINUX_V4_6: version_code = "0x40600"; break;
-  case LINUX_V4_7: version_code = "0x40700"; break;
-  case LINUX_V4_8: version_code = "0x40800"; break;
-  case LINUX_V4_9: version_code = "0x40900"; break;
-  case LINUX_V4_10: version_code = "0x40a00"; break;
-  case LINUX_V4_11: version_code = "0x40b00"; break;
-  case LINUX_V4_12: version_code = "0x40c00"; break;
-  case LINUX_V4_13: version_code = "0x40d00"; break;
-  case LINUX_V4_14: version_code = "0x40e00"; break;
-  case LINUX_V4_15: version_code = "0x40f00"; break;
-  case LINUX_V4_16: version_code = "0x41000"; break;
-  case LINUX_V4_17: version_code = "0x42000"; break;
-  case LINUX_V4_18: version_code = "0x43000"; break;
-  case LINUX_V4_19: version_code = "0x44000"; break;
-  case LINUX_V4_20: version_code = "0x45000"; break;
-  case LINUX_V5_0: version_code = "0x5"; break;
-  case LINUX_V5_1: version_code = "0x50100"; break;
-  case LINUX_V5_2: version_code = "0x50200"; break;
-  default:
-   gcc_unreachable ();
-  }
-
-kernel_version_code = ACONCAT (("__BPF_KERNEL_VERSION_CODE__=",
-   version_code, NULL));
-builtin_define (kernel_version_code);
-  }
 }
 
 /* Return an RTX representing the place where a function returns or
diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
index 1e4dcc871d7..3bf9033279b 100644
--- a/gcc/config/bpf/bpf.opt
+++ b/gcc/config/bpf/bpf.opt
@@ -21,93 +21,6 @@
 HeaderInclude
 config/bpf/bpf-opts.h
 
-; Selecting the kind of kernel the eBPF will be running on.
-
-mkernel=
-Target RejectNegative Joined Var(bpf_kernel) Enum(bpf_kernel) 
Init(LINUX_LATEST)
-Generate eBPF for the given Linux kernel version.
-
-Enum
-Name(bpf_kernel) Type(enum bpf_kernel_version)
-
-EnumValue
-Enum(bpf_kernel) String(native) Value(LINUX_NATIVE) DriverOnly
-
-EnumValue
-Enum(bpf_kernel) String(latest) Value(LINUX_LATEST) DriverOnly
-
-EnumValue
-Enum(bpf_kernel) String(4.0) Value(LINUX_V4_0)
-
-EnumValue
-Enum(bpf_kernel) String(4.1) Value(LINUX_V4_1)
-
-EnumValue
-Enum(bpf_kernel) String(4.2) Value(LINUX_V4_2)
-
-EnumValue
-Enum(bpf_kernel) String(4.3) Value(LINUX_V4_3)
-
-EnumValue
-Enum(bpf_kernel) String(4.4) Value(LINUX_V4_4)
-
-EnumValue
-Enum(bpf_kernel) String(4.5) Value(LINUX_V4_5)
-
-EnumValue
-Enum(bpf_kernel) String(4.6) Value(LINUX_V4_6)
-
-EnumValue
-Enum(bpf_kernel) String(4.7) Value(LINUX_V4_7)
-
-EnumValue
-Enum(bpf_kernel) String(4.8) 

[COMMITTED] bpf: make use of the bswap{16,32,64} V4 BPF instruction

2023-07-24 Thread Jose E. Marchesi via Gcc-patches
This patch makes the BPF backend to use the new V4 bswap{16,32,64}
instructions in order to implement the __builtin_bswap{16,32,64}
built-ins.  It also adds support for -mcpu=v4 and -m[no]bswap
command-line options.  Tests and doc updates are includes.

Tested in bpf-unknown-none.

gcc/ChangeLog

PR target/110786
* config/bpf/bpf.opt (mcpu): Add ISA_V4 and make it the default.
(mbswap): New option.
* config/bpf/bpf-opts.h (enum bpf_isa_version): New value ISA_V4.
* config/bpf/bpf.cc (bpf_option_override): Set bpf_has_bswap.
* config/bpf/bpf.md: Use bswap instructions if available for
bswap* insn, and fix constraint.
* doc/invoke.texi (eBPF Options): Document -mcpu=v4 and -mbswap.

gcc/testsuite/ChangeLog

PR target/110786
* gcc.target/bpf/bswap-1.c: Pass -mcpu=v3 to build test.
* gcc.target/bpf/bswap-2.c: New test.
---
 gcc/config/bpf/bpf-opts.h  |  1 +
 gcc/config/bpf/bpf.cc  |  3 +++
 gcc/config/bpf/bpf.md  | 17 +++--
 gcc/config/bpf/bpf.opt |  9 -
 gcc/doc/invoke.texi| 11 ++-
 gcc/testsuite/gcc.target/bpf/bswap-1.c |  2 +-
 gcc/testsuite/gcc.target/bpf/bswap-2.c | 23 +++
 7 files changed, 57 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/bswap-2.c

diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
index 92db01ec4d5..e0be591b479 100644
--- a/gcc/config/bpf/bpf-opts.h
+++ b/gcc/config/bpf/bpf-opts.h
@@ -58,6 +58,7 @@ enum bpf_isa_version
   ISA_V1,
   ISA_V2,
   ISA_V3,
+  ISA_V4
 };
 
 enum bpf_asm_dialect
diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 1d3936871d6..6bc715429dc 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -253,6 +253,9 @@ bpf_option_override (void)
   if (bpf_has_jmp32 == -1)
 bpf_has_jmp32 = (bpf_isa >= ISA_V3);
 
+  if (bpf_has_bswap == -1)
+bpf_has_bswap = (bpf_isa >= ISA_V4);
+
   /* Disable -fstack-protector as it is not supported in BPF.  */
   if (flag_stack_protect)
 {
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index 80220f2ef37..81e2268c400 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -60,7 +60,7 @@ (define_constants
 ;; Instruction classes.
 ;; alu 64-bit arithmetic.
 ;; alu32   32-bit arithmetic.
-;; end endianness conversion instructions.
+;; end endianness conversion or byte swap instructions.
 ;; ld  load instructions.
 ;; lddxload 64-bit immediate instruction.
 ;; ldx generic load instructions.
@@ -354,20 +354,25 @@ (define_insn "lshr3"
   "{rsh\t%0,%2|%w0 >>= %w2}"
   [(set_attr "type" "")])
 
- Endianness conversion
+ Byte swapping
 
 (define_mode_iterator BSM [HI SI DI])
 (define_mode_attr endmode [(HI "16") (SI "32") (DI "64")])
 
 (define_insn "bswap2"
   [(set (match_operand:BSM 0 "register_operand""=r")
-(bswap:BSM (match_operand:BSM 1 "register_operand" " r")))]
+(bswap:BSM (match_operand:BSM 1 "register_operand" " 0")))]
   ""
 {
-  if (TARGET_BIG_ENDIAN)
-return "{endle\t%0, |%0 = le %0}";
+  if (bpf_has_bswap)
+return "{bswap\t%0, |%0 = bswap %1}";
   else
-return "{endbe\t%0, |%0 = be %0}";
+{
+  if (TARGET_BIG_ENDIAN)
+return "{endle\t%0, |%0 = le %1}";
+  else
+return "{endbe\t%0, |%0 = be %1}";
+}
 }
   [(set_attr "type" "end")])
 
diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
index ff805f9e083..1e4dcc871d7 100644
--- a/gcc/config/bpf/bpf.opt
+++ b/gcc/config/bpf/bpf.opt
@@ -146,8 +146,12 @@ mjmp32
 Target Var(bpf_has_jmp32) Init(-1)
 Enable 32-bit jump instructions.
 
+mbswap
+Target Var(bpf_has_bswap) Init(-1)
+Enable byte swap instructions.
+
 mcpu=
-Target RejectNegative Joined Var(bpf_isa) Enum(bpf_isa) Init(ISA_V3)
+Target RejectNegative Joined Var(bpf_isa) Enum(bpf_isa) Init(ISA_V4)
 
 Enum
 Name(bpf_isa) Type(enum bpf_isa_version)
@@ -161,6 +165,9 @@ Enum(bpf_isa) String(v2) Value(ISA_V2)
 EnumValue
 Enum(bpf_isa) String(v3) Value(ISA_V3)
 
+EnumValue
+Enum(bpf_isa) String(v4) Value(ISA_V4)
+
 masm=
 Target RejectNegative Joined Var(asm_dialect) Enum(asm_dialect) 
Init(ASM_NORMAL)
 Use given assembler dialect.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index efd356e7ef6..a977a34db42 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24707,10 +24707,14 @@ Enable 32-bit jump instructions. Enabled for CPU v3 
and above.
 @item -malu32
 Enable 32-bit ALU instructions. Enabled for CPU v3 and above.
 
+@opindex mbswap
+@item -mbswap
+Enable byte swap instructions.  Enabled for CPU v4 and above.
+
 @opindex mcpu
 @item -mcpu=@var{version}
 This specifies which version of the eBPF ISA to target. Newer versions
-may not be supported by all kernels. The default is @samp{v3}.
+may not be supported by all kernels. The default is @samp{v4}.
 

Re: [PATCH v4] bpf: fixed template for neg (added second operand)

2023-07-21 Thread Jose E. Marchesi via Gcc-patches


Better with the commit message.
OK.  Thanks.

> This patch fixes define_insn for "neg" to support 2 operands.
> Initial implementation assumed the format "neg %0" while the instruction
> allows both a destination and source operands. The second operand can
> either be a register or an immediate value.
>
> gcc/ChangeLog:
>
>   * config/bpf/bpf.md: fixed template for neg instruction.
> ---
>  gcc/config/bpf/bpf.md | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 329f62f55c33..adf11e151df1 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -139,10 +139,10 @@
>  
>  ;;; Negation
>  (define_insn "neg2"
> -  [(set (match_operand:AM 0 "register_operand" "=r")
> -(neg:AM (match_operand:AM 1 "register_operand" " 0")))]
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +(neg:AM (match_operand:AM 1 "register_operand" " r,I")))]
>""
> -  "neg\t%0"
> +  "neg\t%0,%1"
>[(set_attr "type" "")])
>  
>  ;;; Multiplication


Re: [PATCH v3] bpf: fixed template for neg (added second operand)

2023-07-21 Thread Jose E. Marchesi via Gcc-patches


Hi Cuper.
OK.  Thanks!

> From 7756a4becd1934e55d6d14ac4a9fd6d408a4797b Mon Sep 17 00:00:00 2001
> From: Cupertino Miranda 
> Date: Fri, 21 Jul 2023 17:40:07 +0100
> Subject: [PATCH v3] bpf: fixed template for neg (added second operand)
>
> gcc/ChangeLog:
>
>   * config/bpf/bpf.md: fixed template for neg instruction.
> ---
>  gcc/config/bpf/bpf.md | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index 329f62f55c33..adf11e151df1 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -139,10 +139,10 @@
>  
>  ;;; Negation
>  (define_insn "neg2"
> -  [(set (match_operand:AM 0 "register_operand" "=r")
> -(neg:AM (match_operand:AM 1 "register_operand" " 0")))]
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +(neg:AM (match_operand:AM 1 "register_operand" " r,I")))]
>""
> -  "neg\t%0"
> +  "neg\t%0,%1"
>[(set_attr "type" "")])
>  
>  ;;; Multiplication


Re: [PATCH v3] bpf: pseudo-c assembly dialect support

2023-07-21 Thread Jose E. Marchesi via Gcc-patches


> Thanks for the suggestions/fixes in changelog.
> Inlined new patch.
>
> Cupertino
>
>>> gcc/ChangeLog:
>>>
>>> * config/bpf/bpf.opt: Added option -masm=.
>>> * config/bpf/bpf-opts.h: Likewize.
>>> * config/bpf/bpf.cc: Changed it to conform with new pseudoc
>>>   dialect support.
>>> * config/bpf/bpf.h: Likewise.
>>> * config/bpf/bpf.md: Added pseudo-c templates.
>>> * doc/invoke.texi: (-masm=DIALECT) New eBPF option item.
>>
>> I think the ChangeLog could be made more useful, and the syntax of the
>> last entry is not entirely right.  I suggest something like:
>>
>>  * config/bpf/bpf.opt: Added option -masm=.
>>  * config/bpf/bpf-opts.h (enum bpf_asm_dialect): New type.
>>  * config/bpf/bpf.cc (bpf_print_register): New function.
>>  (bpf_print_register): Support pseudo-c syntax for registers.
>>  (bpf_print_operand_address): Likewise.
>>  * config/bpf/bpf.h (ASM_SPEC): handle -msasm.
>>  (ASSEMBLER_DIALECT): Define.
>>  * config/bpf/bpf.md: Added pseudo-c templates.
>>  * doc/invoke.texi (-masm=DIALECT): New eBPF option item.
>>
>> Please make sure to run the contrib/gcc-changelog/git_check-commit.py
>> script.
>>
>
> From 6ebe3229a59b32ffb2ed24b3a2cf8c360a807c31 Mon Sep 17 00:00:00 2001
> From: Cupertino Miranda 
> Date: Mon, 17 Jul 2023 17:42:42 +0100
> Subject: [PATCH v3] bpf: pseudo-c assembly dialect support
>
> New pseudo-c BPF assembly dialect already supported by clang and widely
> used in the linux kernel.
>
> gcc/ChangeLog:
>
>   * config/bpf/bpf.opt: Added option -masm=.
>   * config/bpf/bpf-opts.h (enum bpf_asm_dialect): New type.
>   * config/bpf/bpf.cc (bpf_print_register): New function.
>   (bpf_print_register): Support pseudo-c syntax for registers.
>   (bpf_print_operand_address): Likewise.
>   * config/bpf/bpf.h (ASM_SPEC): handle -msasm.
>   (ASSEMBLER_DIALECT): Define.
>   * config/bpf/bpf.md: Added pseudo-c templates.
>   * doc/invoke.texi (-masm=): New eBPF option item.
> ---
>  gcc/config/bpf/bpf-opts.h |  6 +++
>  gcc/config/bpf/bpf.cc | 46 ---
>  gcc/config/bpf/bpf.h  |  5 +-
>  gcc/config/bpf/bpf.md | 97 ---
>  gcc/config/bpf/bpf.opt| 14 ++
>  gcc/doc/invoke.texi   | 21 -
>  6 files changed, 133 insertions(+), 56 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
> index 8282351cf045..92db01ec4d54 100644
> --- a/gcc/config/bpf/bpf-opts.h
> +++ b/gcc/config/bpf/bpf-opts.h
> @@ -60,4 +60,10 @@ enum bpf_isa_version
>ISA_V3,
>  };
>  
> +enum bpf_asm_dialect
> +{
> +  ASM_NORMAL,
> +  ASM_PSEUDOC
> +};
> +
>  #endif /* ! BPF_OPTS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index e0324e1e0e08..1d3936871d60 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -873,16 +873,47 @@ bpf_output_call (rtx target)
>return "";
>  }
>  
> +/* Print register name according to assembly dialect.
> +   In normal syntax registers are printed like %rN where N is the
> +   register number.
> +   In pseudoc syntax, the register names do not feature a '%' prefix.
> +   Additionally, the code 'w' denotes that the register should be printed
> +   as wN instead of rN, where N is the register number, but only when the
> +   value stored in the operand OP is 32-bit wide.  */
> +static void
> +bpf_print_register (FILE *file, rtx op, int code)
> +{
> +  if(asm_dialect == ASM_NORMAL)
> +fprintf (file, "%s", reg_names[REGNO (op)]);
> +  else
> +{
> +  if (code == 'w' && GET_MODE (op) == SImode)
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "w10");
> +   else
> + fprintf (file, "w%s", reg_names[REGNO (op)]+2);
> + }
> +  else
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "r10");
> +   else
> + fprintf (file, "%s", reg_names[REGNO (op)]+1);
> + }
> +}
> +}
> +
>  /* Print an instruction operand.  This function is called in the macro
> PRINT_OPERAND defined in bpf.h */
>  
>  void
> -bpf_print_operand (FILE *file, rtx op, int code ATTRIBUTE_UNUSED)
> +bpf_print_operand (FILE *file, rtx op, int code)
>  {
>switch (GET_CODE (op))
>  {
>  case REG:
> -  fprintf (file, "%s", reg_names[REGNO (op)]);
> +  bpf_print_register (file, op, code);
>break;
>  case MEM:
>output_address (GET_MODE (op), XEXP (op, 0));
> @@ -936,7 +967,9 @@ bpf_print_operand_address (FILE *file, rtx addr)
>switch (GET_CODE (addr))
>  {
>  case REG:
> -  fprintf (file, "[%s+0]", reg_names[REGNO (addr)]);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> +  bpf_print_register (file, addr, 0);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0)");
>break;
>  case PLUS:
>{
> @@ -945,9 +978,11 @@ bpf_print_operand_address (FILE *file, rtx addr)
>  
>   if (GET_CODE 

Re: [PATCH v2] bpf: pseudo-c assembly dialect support

2023-07-21 Thread Jose E. Marchesi via Gcc-patches


> gcc/ChangeLog:
>
>   * config/bpf/bpf.opt: Added option -masm=.
>   * config/bpf/bpf-opts.h: Likewize.
>   * config/bpf/bpf.cc: Changed it to conform with new pseudoc
> dialect support.
>   * config/bpf/bpf.h: Likewise.
>   * config/bpf/bpf.md: Added pseudo-c templates.
>   * doc/invoke.texi: (-masm=DIALECT) New eBPF option item.

I think the ChangeLog could be made more useful, and the syntax of the
last entry is not entirely right.  I suggest something like:

* config/bpf/bpf.opt: Added option -masm=.
* config/bpf/bpf-opts.h (enum bpf_asm_dialect): New type.
* config/bpf/bpf.cc (bpf_print_register): New function.
(bpf_print_register): Support pseudo-c syntax for registers.
(bpf_print_operand_address): Likewise.
* config/bpf/bpf.h (ASM_SPEC): handle -msasm.
(ASSEMBLER_DIALECT): Define.
* config/bpf/bpf.md: Added pseudo-c templates.
* doc/invoke.texi (-masm=DIALECT): New eBPF option item.

Please make sure to run the contrib/gcc-changelog/git_check-commit.py
script.

> ---
>  gcc/config/bpf/bpf-opts.h |  6 +++
>  gcc/config/bpf/bpf.cc | 46 ---
>  gcc/config/bpf/bpf.h  |  5 +-
>  gcc/config/bpf/bpf.md | 97 ---
>  gcc/config/bpf/bpf.opt| 14 ++
>  gcc/doc/invoke.texi   | 21 -
>  6 files changed, 133 insertions(+), 56 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
> index 8282351cf045..92db01ec4d54 100644
> --- a/gcc/config/bpf/bpf-opts.h
> +++ b/gcc/config/bpf/bpf-opts.h
> @@ -60,4 +60,10 @@ enum bpf_isa_version
>ISA_V3,
>  };
>  
> +enum bpf_asm_dialect
> +{
> +  ASM_NORMAL,
> +  ASM_PSEUDOC
> +};
> +
>  #endif /* ! BPF_OPTS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index e0324e1e0e08..1d3936871d60 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -873,16 +873,47 @@ bpf_output_call (rtx target)
>return "";
>  }
>  
> +/* Print register name according to assembly dialect.
> +   In normal syntax registers are printed like %rN where N is the
> +   register number.
> +   In pseudoc syntax, the register names do not feature a '%' prefix.
> +   Additionally, the code 'w' denotes that the register should be printed
> +   as wN instead of rN, where N is the register number, but only when the
> +   value stored in the operand OP is 32-bit wide.  */
> +static void
> +bpf_print_register (FILE *file, rtx op, int code)
> +{
> +  if(asm_dialect == ASM_NORMAL)
> +fprintf (file, "%s", reg_names[REGNO (op)]);
> +  else
> +{
> +  if (code == 'w' && GET_MODE (op) == SImode)
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "w10");
> +   else
> + fprintf (file, "w%s", reg_names[REGNO (op)]+2);
> + }
> +  else
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "r10");
> +   else
> + fprintf (file, "%s", reg_names[REGNO (op)]+1);
> + }
> +}
> +}
> +
>  /* Print an instruction operand.  This function is called in the macro
> PRINT_OPERAND defined in bpf.h */
>  
>  void
> -bpf_print_operand (FILE *file, rtx op, int code ATTRIBUTE_UNUSED)
> +bpf_print_operand (FILE *file, rtx op, int code)
>  {
>switch (GET_CODE (op))
>  {
>  case REG:
> -  fprintf (file, "%s", reg_names[REGNO (op)]);
> +  bpf_print_register (file, op, code);
>break;
>  case MEM:
>output_address (GET_MODE (op), XEXP (op, 0));
> @@ -936,7 +967,9 @@ bpf_print_operand_address (FILE *file, rtx addr)
>switch (GET_CODE (addr))
>  {
>  case REG:
> -  fprintf (file, "[%s+0]", reg_names[REGNO (addr)]);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> +  bpf_print_register (file, addr, 0);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0)");
>break;
>  case PLUS:
>{
> @@ -945,9 +978,11 @@ bpf_print_operand_address (FILE *file, rtx addr)
>  
>   if (GET_CODE (op0) == REG && GET_CODE (op1) == CONST_INT)
> {
> - fprintf (file, "[%s+", reg_names[REGNO (op0)]);
> + fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> + bpf_print_register (file, op0, 0);
> + fprintf (file, "+");
>   output_addr_const (file, op1);
> - fputs ("]", file);
> + fprintf (file, asm_dialect == ASM_NORMAL ? "]" : ")");
> }
>   else
> fatal_insn ("invalid address in operand", addr);
> @@ -1816,7 +1851,6 @@ handle_attr_preserve (function *fn)
>  }
>  }
>  
> -
>  /* This pass finds accesses to structures marked with the BPF target 
> attribute
> __attribute__((preserve_access_index)). For every such access, a CO-RE
> relocation record is generated, to be output in the .BTF.ext section.  */
> diff --git a/gcc/config/bpf/bpf.h b/gcc/config/bpf/bpf.h
> index 344aca02d1bb..9561bf59b800 100644
> --- a/gcc/config/bpf/bpf.h

Re: [PATCH] bpf: pseudo-c assembly dialect support

2023-07-21 Thread Jose E. Marchesi via Gcc-patches


Hello Cuper.

Thanks for the patch.

We will need an update for the "eBPF Options" section in the GCC manual,
documenting -masm=@var{dialect} and the supported values.  Can you
please add it and re-submit?


> Hi everyone,
>
> Looking forward to all your reviews.
>
> Best regards,
> Cupertino
>
> New pseudo-c BPF assembly dialect already supported by clang and widely
> used in the linux kernel.
>
> gcc/ChangeLog:
>
>   * config/bpf/bpf.opt: Added option -masm=.
>   * config/bpf/bpf-opts.h: Likewize.
>   * config/bpf/bpf.cc: Changed it to conform with new pseudoc
> dialect support.
>   * config/bpf/bpf.h: Likewise.
>   * config/bpf/bpf.md: Added pseudo-c templates.
> ---
>  gcc/config/bpf/bpf-opts.h |  6 +++
>  gcc/config/bpf/bpf.cc | 46 ---
>  gcc/config/bpf/bpf.h  |  5 +-
>  gcc/config/bpf/bpf.md | 97 ---
>  gcc/config/bpf/bpf.opt| 14 ++
>  5 files changed, 114 insertions(+), 54 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
> index 8282351cf045..92db01ec4d54 100644
> --- a/gcc/config/bpf/bpf-opts.h
> +++ b/gcc/config/bpf/bpf-opts.h
> @@ -60,4 +60,10 @@ enum bpf_isa_version
>ISA_V3,
>  };
>  
> +enum bpf_asm_dialect
> +{
> +  ASM_NORMAL,
> +  ASM_PSEUDOC
> +};
> +
>  #endif /* ! BPF_OPTS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index e0324e1e0e08..1d3936871d60 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -873,16 +873,47 @@ bpf_output_call (rtx target)
>return "";
>  }
>  
> +/* Print register name according to assembly dialect.
> +   In normal syntax registers are printed like %rN where N is the
> +   register number.
> +   In pseudoc syntax, the register names do not feature a '%' prefix.
> +   Additionally, the code 'w' denotes that the register should be printed
> +   as wN instead of rN, where N is the register number, but only when the
> +   value stored in the operand OP is 32-bit wide.  */
> +static void
> +bpf_print_register (FILE *file, rtx op, int code)
> +{
> +  if(asm_dialect == ASM_NORMAL)
> +fprintf (file, "%s", reg_names[REGNO (op)]);
> +  else
> +{
> +  if (code == 'w' && GET_MODE (op) == SImode)
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "w10");
> +   else
> + fprintf (file, "w%s", reg_names[REGNO (op)]+2);
> + }
> +  else
> + {
> +   if (REGNO (op) == BPF_FP)
> + fprintf (file, "r10");
> +   else
> + fprintf (file, "%s", reg_names[REGNO (op)]+1);
> + }
> +}
> +}
> +
>  /* Print an instruction operand.  This function is called in the macro
> PRINT_OPERAND defined in bpf.h */
>  
>  void
> -bpf_print_operand (FILE *file, rtx op, int code ATTRIBUTE_UNUSED)
> +bpf_print_operand (FILE *file, rtx op, int code)
>  {
>switch (GET_CODE (op))
>  {
>  case REG:
> -  fprintf (file, "%s", reg_names[REGNO (op)]);
> +  bpf_print_register (file, op, code);
>break;
>  case MEM:
>output_address (GET_MODE (op), XEXP (op, 0));
> @@ -936,7 +967,9 @@ bpf_print_operand_address (FILE *file, rtx addr)
>switch (GET_CODE (addr))
>  {
>  case REG:
> -  fprintf (file, "[%s+0]", reg_names[REGNO (addr)]);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> +  bpf_print_register (file, addr, 0);
> +  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0)");
>break;
>  case PLUS:
>{
> @@ -945,9 +978,11 @@ bpf_print_operand_address (FILE *file, rtx addr)
>  
>   if (GET_CODE (op0) == REG && GET_CODE (op1) == CONST_INT)
> {
> - fprintf (file, "[%s+", reg_names[REGNO (op0)]);
> + fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
> + bpf_print_register (file, op0, 0);
> + fprintf (file, "+");
>   output_addr_const (file, op1);
> - fputs ("]", file);
> + fprintf (file, asm_dialect == ASM_NORMAL ? "]" : ")");
> }
>   else
> fatal_insn ("invalid address in operand", addr);
> @@ -1816,7 +1851,6 @@ handle_attr_preserve (function *fn)
>  }
>  }
>  
> -
>  /* This pass finds accesses to structures marked with the BPF target 
> attribute
> __attribute__((preserve_access_index)). For every such access, a CO-RE
> relocation record is generated, to be output in the .BTF.ext section.  */
> diff --git a/gcc/config/bpf/bpf.h b/gcc/config/bpf/bpf.h
> index 344aca02d1bb..9561bf59b800 100644
> --- a/gcc/config/bpf/bpf.h
> +++ b/gcc/config/bpf/bpf.h
> @@ -22,7 +22,8 @@
>  
>  / Controlling the Compilation Driver.  */
>  
> -#define ASM_SPEC "%{mbig-endian:-EB} %{!mbig-endian:-EL} %{mxbpf:-mxbpf}"
> +#define ASM_SPEC "%{mbig-endian:-EB} %{!mbig-endian:-EL} %{mxbpf:-mxbpf} " \
> +  "%{masm=pseudoc:-mdialect=pseudoc}"
>  #define LINK_SPEC "%{mbig-endian:-EB} %{!mbig-endian:-EL}"
>  #define LIB_SPEC ""
>  #define STARTFILE_SPEC ""
> @@ -503,4 

[COMMITTED] bpf: enable instruction scheduling

2023-07-14 Thread Jose E. Marchesi via Gcc-patches


commit 53d12ecd624ec901d8449cfa1917f6f90e910927 (HEAD -> master, origin/master, 
origin/HEAD)
Author: Jose E. Marchesi 
Date:   Fri Jul 14 13:54:06 2023 +0200

bpf: enable instruction scheduling

This patch adds a dummy FSM to bpf.md in order to get INSN_SCHEDULING
defined.  If the later is not defined, the `combine' pass generates
paradoxical subregs of mems, which seems to then be mishandled by LRA,
resulting in invalid code.

Tested in bpf-unknown-none.

gcc/ChangeLog:

2023-07-14  Jose E. Marchesi  

PR target/110657
* config/bpf/bpf.md: Enable instruction scheduling.

diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index f6be0a21234..329f62f55c3 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -20,6 +20,17 @@
 (include "predicates.md")
 (include "constraints.md")
 
+ Instruction Scheduler FSM
+
+;; This is just to get INSN_SCHEDULING defined, so that combine does
+;; not make paradoxical subregs of memory.  These subregs seems to
+;; confuse LRA that ends generating wrong instructions.
+
+(define_automaton "frob")
+(define_cpu_unit "frob_unit" "frob")
+(define_insn_reservation "frobnicator" 814
+  (const_int 0) "frob_unit")
+
  Unspecs
 
 (define_c_enum "unspec" [


Re: [PATCH 0/9] Add btf_decl_tag C attribute

2023-07-12 Thread Jose E. Marchesi via Gcc-patches


> On Wed, Jul 12, 2023 at 2:44 PM Jose E. Marchesi
>  wrote:
>>
>>
>> [Added Eduard Zingerman in CC, who is implementing this same feature in
>>  clang/llvm and also the consumer component in the kernel (pahole).]
>>
>> Hi Richard.
>>
>> > On Tue, Jul 11, 2023 at 11:58 PM David Faust via Gcc-patches
>> >  wrote:
>> >>
>> >> Hello,
>> >>
>> >> This series adds support for a new attribute, "btf_decl_tag" in GCC.
>> >> The same attribute is already supported in clang, and is used by various
>> >> components of the BPF ecosystem.
>> >>
>> >> The purpose of the attribute is to allow to associate (to "tag")
>> >> declarations with arbitrary string annotations, which are emitted into
>> >> debugging information (DWARF and/or BTF) to facilitate post-compilation
>> >> analysis (the motivating use case being the Linux kernel BPF verifier).
>> >> Multiple tags are allowed on the same declaration.
>> >>
>> >> These strings are not interpreted by the compiler, and the attribute
>> >> itself has no effect on generated code, other than to produce additional
>> >> DWARF DIEs and/or BTF records conveying the annotations.
>> >>
>> >> This entails:
>> >>
>> >> - A new C-language-level attribute which allows to associate (to "tag")
>> >>   particular declarations with arbitrary strings.
>> >>
>> >> - The conveyance of that information in DWARF in the form of a new DIE,
>> >>   DW_TAG_GNU_annotation, with tag number (0x6000) and format matching
>> >>   that of the DW_TAG_LLVM_annotation extension supported in LLVM for
>> >>   the same purpose. These DIEs are already supported by BPF tooling,
>> >>   such as pahole.
>> >>
>> >> - The conveyance of that information in BTF debug info in the form of
>> >>   BTF_KIND_DECL_TAG records. These records are already supported by
>> >>   LLVM and other tools in the eBPF ecosystem, such as the Linux kernel
>> >>   eBPF verifier.
>> >>
>> >>
>> >> Background
>> >> ==
>> >>
>> >> The purpose of these tags is to convey additional semantic information
>> >> to post-compilation consumers, in particular the Linux kernel eBPF
>> >> verifier. The verifier can make use of that information while analyzing
>> >> a BPF program to aid in determining whether to allow or reject the
>> >> program to be run. More background on these tags can be found in the
>> >> early support for them in the kernel here [1] and [2].
>> >>
>> >> The "btf_decl_tag" attribute is half the story; the other half is a
>> >> sibling attribute "btf_type_tag" which serves the same purpose but
>> >> applies to types. Support for btf_type_tag will come in a separate
>> >> patch series, since it is impaced by GCC bug 110439 which needs to be
>> >> addressed first.
>> >>
>> >> I submitted an initial version of this work (including btf_type_tag)
>> >> last spring [3], however at the time there were some open questions
>> >> about the behavior of the btf_type_tag attribute and issues with its
>> >> implementation. Since then we have clarified these details and agreed
>> >> to solutions with the BPF community and LLVM BPF folks.
>> >>
>> >> The main motivation for emitting the tags in DWARF is that the Linux
>> >> kernel generates its BTF information via pahole, using DWARF as a source:
>> >>
>> >> ++  BTF  BTF   +--+
>> >> | pahole |---> vmlinux.btf --->| verifier |
>> >> ++ +--+
>> >> ^^
>> >> ||
>> >>   DWARF |BTF |
>> >> ||
>> >>   vmlinux  +-+
>> >>   module1.ko   | BPF program |
>> >>   module2.ko   +-+
>> >> ...
>> >>
>> >> This is because:
>> >>
>> >> a)  pahole adds additional kernel-specific information into the
>> >> produced BTF based on additional analysis of kernel objects.
>> >>
>> >> b)  Unlike GCC, LLVM will only generate BTF for BPF programs.
>> >>
>> >> b)  GCC can generate BTF for whatever target with -gbtf, but there is no
>> >> support for linking/deduplicating BTF in the linker.
>> >>
>> >> In the scenario above, the verifier needs access to the pointer tags of
>> >> both the kernel types/declarations (conveyed in the DWARF and translated
>> >> to BTF by pahole) and those of the BPF program (available directly in 
>> >> BTF).
>> >>
>> >>
>> >> DWARF Representation
>> >> 
>> >>
>> >> As noted above, btf_decl_tag is represented in DWARF via a new DIE
>> >> DW_TAG_GNU_annotation, with identical format to the LLVM DWARF
>> >> extension DW_TAG_LLVM_annotation serving the same purpose. The DIE has
>> >> the following format:
>> >>
>> >>   DW_TAG_GNU_annotation (0x6000)
>> >> DW_AT_name: "btf_decl_tag"
>> >> DW_AT_const_value: 
>> >>
>> >> These DIEs are placed in the DWARF tree as 

Re: [PATCH 0/9] Add btf_decl_tag C attribute

2023-07-12 Thread Jose E. Marchesi via Gcc-patches


[Added Eduard Zingerman in CC, who is implementing this same feature in
 clang/llvm and also the consumer component in the kernel (pahole).]

Hi Richard.

> On Tue, Jul 11, 2023 at 11:58 PM David Faust via Gcc-patches
>  wrote:
>>
>> Hello,
>>
>> This series adds support for a new attribute, "btf_decl_tag" in GCC.
>> The same attribute is already supported in clang, and is used by various
>> components of the BPF ecosystem.
>>
>> The purpose of the attribute is to allow to associate (to "tag")
>> declarations with arbitrary string annotations, which are emitted into
>> debugging information (DWARF and/or BTF) to facilitate post-compilation
>> analysis (the motivating use case being the Linux kernel BPF verifier).
>> Multiple tags are allowed on the same declaration.
>>
>> These strings are not interpreted by the compiler, and the attribute
>> itself has no effect on generated code, other than to produce additional
>> DWARF DIEs and/or BTF records conveying the annotations.
>>
>> This entails:
>>
>> - A new C-language-level attribute which allows to associate (to "tag")
>>   particular declarations with arbitrary strings.
>>
>> - The conveyance of that information in DWARF in the form of a new DIE,
>>   DW_TAG_GNU_annotation, with tag number (0x6000) and format matching
>>   that of the DW_TAG_LLVM_annotation extension supported in LLVM for
>>   the same purpose. These DIEs are already supported by BPF tooling,
>>   such as pahole.
>>
>> - The conveyance of that information in BTF debug info in the form of
>>   BTF_KIND_DECL_TAG records. These records are already supported by
>>   LLVM and other tools in the eBPF ecosystem, such as the Linux kernel
>>   eBPF verifier.
>>
>>
>> Background
>> ==
>>
>> The purpose of these tags is to convey additional semantic information
>> to post-compilation consumers, in particular the Linux kernel eBPF
>> verifier. The verifier can make use of that information while analyzing
>> a BPF program to aid in determining whether to allow or reject the
>> program to be run. More background on these tags can be found in the
>> early support for them in the kernel here [1] and [2].
>>
>> The "btf_decl_tag" attribute is half the story; the other half is a
>> sibling attribute "btf_type_tag" which serves the same purpose but
>> applies to types. Support for btf_type_tag will come in a separate
>> patch series, since it is impaced by GCC bug 110439 which needs to be
>> addressed first.
>>
>> I submitted an initial version of this work (including btf_type_tag)
>> last spring [3], however at the time there were some open questions
>> about the behavior of the btf_type_tag attribute and issues with its
>> implementation. Since then we have clarified these details and agreed
>> to solutions with the BPF community and LLVM BPF folks.
>>
>> The main motivation for emitting the tags in DWARF is that the Linux
>> kernel generates its BTF information via pahole, using DWARF as a source:
>>
>> ++  BTF  BTF   +--+
>> | pahole |---> vmlinux.btf --->| verifier |
>> ++ +--+
>> ^^
>> ||
>>   DWARF |BTF |
>> ||
>>   vmlinux  +-+
>>   module1.ko   | BPF program |
>>   module2.ko   +-+
>> ...
>>
>> This is because:
>>
>> a)  pahole adds additional kernel-specific information into the
>> produced BTF based on additional analysis of kernel objects.
>>
>> b)  Unlike GCC, LLVM will only generate BTF for BPF programs.
>>
>> b)  GCC can generate BTF for whatever target with -gbtf, but there is no
>> support for linking/deduplicating BTF in the linker.
>>
>> In the scenario above, the verifier needs access to the pointer tags of
>> both the kernel types/declarations (conveyed in the DWARF and translated
>> to BTF by pahole) and those of the BPF program (available directly in BTF).
>>
>>
>> DWARF Representation
>> 
>>
>> As noted above, btf_decl_tag is represented in DWARF via a new DIE
>> DW_TAG_GNU_annotation, with identical format to the LLVM DWARF
>> extension DW_TAG_LLVM_annotation serving the same purpose. The DIE has
>> the following format:
>>
>>   DW_TAG_GNU_annotation (0x6000)
>> DW_AT_name: "btf_decl_tag"
>> DW_AT_const_value: 
>>
>> These DIEs are placed in the DWARF tree as children of the DIE for the
>> appropriate declaration, and one such DIE is created for each occurrence
>> of the btf_decl_tag attribute on a declaration.
>>
>> For example:
>>
>>   const int * c __attribute__((btf_decl_tag ("__c"), btf_decl_tag 
>> ("devicemem")));
>>
>> This declaration produces the following DWARF:
>>
>>  <1><1e>: Abbrev Number: 2 (DW_TAG_variable)
>> <1f>   DW_AT_name  

Re: [committed] Docs: Fix formatting issues in BPF built-ins documentation

2023-03-20 Thread Jose E. Marchesi via Gcc-patches


Hi Sandra.

> This section of the GCC manual had some issues with lines in the
> example overflowing into the right margin of the PDF-format document,
> but as I looked at it more closely I also saw that it was full of
> missing or incorrect Texinfo markup, too.  I've cleaned it up thusly.

Thank you so much for fixing these issues in the manual.
Very much appreciated :)

>
> -Sandra
>
> commit 7ffbc74c8c202a16a5e987134f03c2359c531f0e
> Author: Sandra Loosemore 
> Date:   Thu Mar 16 21:07:18 2023 +
>
> Docs: Fix formatting issues in BPF built-ins documentation.
> 
> gcc/ChangeLog:
> * doc/extend.texi (BPF Built-in Functions): Fix numerous markup
> issues.  Add more line breaks to example so it doesn't overflow
> the margins.
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 39d45df8d89..8ecd9611201 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -15715,23 +15715,23 @@ void __builtin_bfin_ssync (void);
>  
>  The following built-in functions are available for eBPF targets.
>  
> -@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_byte 
> (unsigned long long @var{offset})
> +@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_byte 
> (unsigned long long @var{offset})
>  Load a byte from the @code{struct sk_buff} packet data pointed by the 
> register @code{%r6} and return it.
>  @end deftypefn
>  
> -@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_half 
> (unsigned long long @var{offset})
> -Load 16-bits from the @code{struct sk_buff} packet data pointed by the 
> register @code{%r6} and return it.
> +@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_half 
> (unsigned long long @var{offset})
> +Load 16 bits from the @code{struct sk_buff} packet data pointed by the 
> register @code{%r6} and return it.
>  @end deftypefn
>  
> -@deftypefn {Built-in Function} unsigned long long __builtin_bpf_load_word 
> (unsigned long long @var{offset})
> -Load 32-bits from the @code{struct sk_buff} packet data pointed by the 
> register @code{%r6} and return it.
> +@deftypefn {Built-in Function} {unsigned long long} __builtin_bpf_load_word 
> (unsigned long long @var{offset})
> +Load 32 bits from the @code{struct sk_buff} packet data pointed by the 
> register @code{%r6} and return it.
>  @end deftypefn
>  
> -@deftypefn {Built-in Function} void * __builtin_preserve_access_index 
> (@var{expr})
> +@deftypefn {Built-in Function} {void *} __builtin_preserve_access_index 
> (@var{expr})
>  BPF Compile Once-Run Everywhere (CO-RE) support. Instruct GCC to generate 
> CO-RE relocation records for any accesses to aggregate data structures 
> (struct, union, array types) in @var{expr}. This builtin is otherwise 
> transparent, the return value is whatever @var{expr} evaluates to. It is also 
> overloaded: @var{expr} may be of any type (not necessarily a pointer), the 
> return type is the same. Has no effect if @code{-mco-re} is not in effect 
> (either specified or implied).
>  @end deftypefn
>  
> -@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info 
> (@var{expr}, unsigned int @var{kind})
> +@deftypefn {Built-in Function} {unsigned int} __builtin_preserve_field_info 
> (@var{expr}, unsigned int @var{kind})
>  BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
>  extract information to aid in struct/union relocations.  @var{expr} is
>  an access to a field of a struct or union. Depending on @var{kind}, different
> @@ -15739,15 +15739,15 @@ information is returned to the program. A CO-RE 
> relocation for the access in
>  @var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
>  
>  The following values are supported for @var{kind}:
> -@table @var
> +@table @code
>  @item FIELD_BYTE_OFFSET = 0
>  The returned value is the offset, in bytes, of the field from the
> -beginning of the containing structure. For bitfields, the byte offset
> +beginning of the containing structure. For bit-fields, this is the byte 
> offset
>  of the containing word.
>  
>  @item FIELD_BYTE_SIZE = 1
> -The returned value is the size, in bytes, of the field. For bitfields,
> -the size in bytes of the containing word.
> +The returned value is the size, in bytes, of the field. For bit-fields,
> +this is the size in bytes of the containing word.
>  
>  @item FIELD_EXISTENCE = 2
>  The returned value is 1 if the field exists, 0 otherwise. Always 1 at
> @@ -15759,25 +15759,26 @@ The returned value is 1 if the field is signed, 0 
> otherwise.
>  @item FIELD_LSHIFT_U64 = 4
>  @itemx FIELD_RSHIFT_U64 = 5
>  The returned value is the number of bits of left- or right-shifting
> -respectively needed in order to recover the original value of the field,
> -after it has been loaded by a read of FIELD_BYTE_SIZE bytes into an
> -unsigned 64-bit value. Primarily useful for reading bitfield values
> -from structures which may change between 

Re: [patch] bpf: Fix double whitespace warning

2023-02-15 Thread Jose E. Marchesi via Gcc-patches


> Hi!
>
> Since a recent commit, the BPF target produces a new warning due to
> two consecutive non-quoted spaces in a message. This'll fix it:
>
> gcc/
>   * config/bpf/bpf.cc (bpf_option_override): Fix doubled space.
>
>
> Ok?

OK.  Thanks for the patch.

(Sorry I didn't fix this when you first reported it.  My TODO list is
long atm :/)

> MfG, JBG
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index b268801d00c..d8693f8cfbe 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -258,7 +258,7 @@ bpf_option_override (void)
>  {
>inform (input_location,
>"%<-fstack-protector%> does not work "
> -  " on this architecture");
> +   "on this architecture");
>flag_stack_protect = 0;
>  }
>  }


Re: [PATCH] bpf: fix memory constraint of ldx/stx instructions [PR108790]

2023-02-14 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> In some cases where the target memory address for an ldx or stx
> instruction could be reduced to a constant, GCC could emit a malformed
> instruction like:
>
> ldxdw %r0,0
>
> Rather than the expected form:
>
> ldxdw %rX, [%rY + OFFSET]
>
> This is due to the constraint allowing a const_int operand, which the
> output templates do not handle.
>
> Fix it by introducing a new memory constraint for the appropriate
> operands of these instructions, which is identical to 'm' except that
> it does not accept const_int.
>
> Tested with bpf-unknown-none, no known regressions.
> OK?

OK.  Thanks for the patch.

> Thanks.
>
> gcc/
>
>   PR target/108790
>   * config/bpf/constraints.md (q): New memory constraint.
>   * config/bpf/bpf.md (zero_extendhidi2): Use it here.
>   (zero_extendqidi2): Likewise.
>   (zero_extendsidi2): Likewise.
>   (*mov): Likewise.
>
> gcc/testsuite/
>
>   PR target/108790
>   * gcc.target/bpf/ldxdw.c: New test.
> ---
>  gcc/config/bpf/bpf.md| 10 +-
>  gcc/config/bpf/constraints.md| 11 +++
>  gcc/testsuite/gcc.target/bpf/ldxdw.c | 12 
>  3 files changed, 28 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/ldxdw.c
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index d9af98384ef..f6be0a21234 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -242,7 +242,7 @@ (define_insn "xor3"
>  
>  (define_insn "zero_extendhidi2"
>[(set (match_operand:DI 0 "register_operand" "=r,r,r")
> - (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "0,r,m")))]
> + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "0,r,q")))]
>""
>"@
> and\t%0,0x
> @@ -252,7 +252,7 @@ (define_insn "zero_extendhidi2"
>  
>  (define_insn "zero_extendqidi2"
>[(set (match_operand:DI 0 "register_operand" "=r,r,r")
> - (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "0,r,m")))]
> + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "0,r,q")))]
>""
>"@
> and\t%0,0xff
> @@ -263,7 +263,7 @@ (define_insn "zero_extendqidi2"
>  (define_insn "zero_extendsidi2"
>[(set (match_operand:DI 0 "register_operand" "=r,r")
>   (zero_extend:DI
> -   (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
> +   (match_operand:SI 1 "nonimmediate_operand" "r,q")))]
>""
>"@
> * return bpf_has_alu32 ? \"mov32\t%0,%1\" : 
> \"mov\t%0,%1\;and\t%0,0x\";
> @@ -302,8 +302,8 @@ (define_expand "mov"
>  }")
>  
>  (define_insn "*mov"
> -  [(set (match_operand:MM 0 "nonimmediate_operand" "=r, r,r,m,m")
> -(match_operand:MM 1 "mov_src_operand"  " m,rI,B,r,I"))]
> +  [(set (match_operand:MM 0 "nonimmediate_operand" "=r, r,r,q,q")
> +(match_operand:MM 1 "mov_src_operand"  " q,rI,B,r,I"))]
>""
>"@
> ldx\t%0,%1
> diff --git a/gcc/config/bpf/constraints.md b/gcc/config/bpf/constraints.md
> index c8a65cfcddb..33f9177b8eb 100644
> --- a/gcc/config/bpf/constraints.md
> +++ b/gcc/config/bpf/constraints.md
> @@ -29,3 +29,14 @@ (define_constraint "B"
>  (define_constraint "S"
>"A constant call address."
>(match_code "const,symbol_ref,label_ref,const_int"))
> +
> +;;
> +;; Memory constraints.
> +;;
> +
> +; Just like 'm' but disallows const_int.
> +; Used for ldx[b,h,w,dw] and stx[b,h,w,dw] instructions.
> +(define_memory_constraint "q"
> +  "Memory reference which is not a constant integer."
> +  (and (match_code "mem")
> +   (match_test "GET_CODE(XEXP(op, 0)) != CONST_INT")))
> diff --git a/gcc/testsuite/gcc.target/bpf/ldxdw.c 
> b/gcc/testsuite/gcc.target/bpf/ldxdw.c
> new file mode 100644
> index 000..0985ea3e6ac
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/ldxdw.c
> @@ -0,0 +1,12 @@
> +/* Verify that we do not generate a malformed ldxdw instruction
> +   with a constant instead of register + offset.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +/* { dg-final { scan-assembler-times "ldxdw\t%r.,\\\[%r.+0\\\]" 1 } } */
> +/* { dg-final { scan-assembler-not "ldxdw\t%r.,\[0-9\]+" } } */
> +
> +unsigned long long test () {
> +  return *((unsigned long long *) 0x4000);
> +}


Re: [PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2023-01-30 Thread Jose E. Marchesi via Gcc-patches


> On Thu, Dec 8, 2022 at 2:56 AM Jose E. Marchesi via Gcc-patches
>  wrote:
>>
>> The expand_expr_divmod function in expr.cc attempts to optimize cases
>> where both arguments of a division/modulus are known to be positive
>> when interpreted as signed.  In these cases, both signed division and
>> unsigned division will raise the same value, and therefore the
>> cheapest option can be used.
>
> I suspect this issue is also the same as PR 48783 .

Yeah I kinda dropped the ball here.

Will look into Jeff's suggestions on how to fix this without exhausting
the CALL_INSN bits.

> Thanks,
> Andrew
>
>>
>> In order to determine what is the cheaper option in the current
>> target, expand_expr_divmod actually expands both a signed divmod and
>> an unsigned divmod using local "sequences":
>>
>>   start_sequence ();
>>   ...
>>   expand_divmod (... signed ...);
>>   ...
>>   end_sequence ();
>>
>>   start_sequence ();
>>   ...
>>   expand_divmod (... unsigned ...);
>>   ...
>>   end_sequence ();
>>
>> And then compares the cost of each generated sequence, choosing the
>> best one.  Finally, it emits the selected expanded sequence and
>> returns the rtx with the result.
>>
>> This approach has a caveat.  Some targets do not provide instructions
>> for division/modulus instructions.  In the case of BPF, it provides
>> unsigned division/modulus, but not signed division/modulus.
>>
>> In these cases, the expand_divmod tries can contain calls to funcalls.
>> For example, in BPF:
>>
>>   start_sequence ();
>>   ...
>>   expand_divmod (... signed ...); -> This generates funcall to __divdi3
>>   ...
>>   end_sequence ();
>>
>>   start_sequence ();
>>   ...
>>   expand_divmod (... unsigned ...); -> This generates direct `div' insn.
>>   ...
>>   end_sequence ();
>>
>> The problem is that when a funcall is expanded, an accompanying global
>> symbol definition is written in the output stream:
>>
>>   .global __divdi3
>>
>> And this symbol definition remains in the compiled assembly file, even
>> if the sequence using the direct `div' instruction above is used.
>>
>> This is particularly bad in BPF, because the kernel bpf loader chokes
>> on the spurious symbol __divdi3 and makes the resulting BPF object
>> unloadable (note that BPF objects are not linked before processed by
>> the kernel.)
>>
>> In order to fix this, this patch modifies expand_expr_divmod in the
>> following way:
>>
>> - When trying each sequence (signed, unsigned) the expand_divmod calls
>>   are told to _not_ use libcalls if everything else fails.  This is
>>   done by passing OPTAB_WIDEN as the `methods' argument.  (Before it
>>   was using the default value OPTAB_LIB_WIDEN.)
>>
>> - If any of the tried expanded sequences contain a funcall, then the
>>   optimization is not attempted.
>>
>> A couple of BPF tests are also added to make sure this doesn't break
>> at any point in the future.
>>
>> Tested in bpf-unknown-none and x86_64-linux-gnu.
>> Regtested in x86_64-linux-gnu.  No regressions.
>>
>> gcc/ChangeLog
>>
>> * expr.cc (expand_expr_divmod): Avoid side-effects of trying
>> sequences involving funcalls in optimization.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/bpf/divmod-funcall-1.c: New test.
>> * gcc.target/bpf/divmod-funcall-2.c: Likewise.
>> ---
>>  gcc/expr.cc   | 44 +++
>>  .../gcc.target/bpf/divmod-funcall-1.c |  8 
>>  .../gcc.target/bpf/divmod-funcall-2.c |  8 
>>  3 files changed, 41 insertions(+), 19 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-2.c
>>
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index d9407432ea5..4d4be5d7bda 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -9168,32 +9168,38 @@ expand_expr_divmod (tree_code code, machine_mode 
>> mode, tree treeop0,
>>do_pending_stack_adjust ();
>>start_sequence ();
>>rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
>> -  op0, op1, target, 1);
>> +  op0, op1, target, 1, OPTAB_WIDEN);
>>rtx_insn *uns_insns = get_insns ();
>>end_sequence ();
>>star

[COMMITTED] bpf: disable -fstack-protector in BPF

2023-01-17 Thread Jose E. Marchesi via Gcc-patches
The stack protector is not supported in BPF.  This patch disables
-fstack-protector in bpf-* targets, along with the emission of a note
indicating that the feature is not supported in this platform.

Regtested in bpf-unknown-none.

gcc/ChangeLog:

* config/bpf/bpf.cc (bpf_option_override): Disable
-fstack-protector.
---
 gcc/config/bpf/bpf.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 576a1fe8eab..b268801d00c 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -253,6 +253,14 @@ bpf_option_override (void)
   if (bpf_has_jmp32 == -1)
 bpf_has_jmp32 = (bpf_isa >= ISA_V3);
 
+  /* Disable -fstack-protector as it is not supported in BPF.  */
+  if (flag_stack_protect)
+{
+  inform (input_location,
+  "%<-fstack-protector%> does not work "
+  " on this architecture");
+  flag_stack_protect = 0;
+}
 }
 
 #undef TARGET_OPTION_OVERRIDE
-- 
2.30.2



Re: [PATCH,WWWDOCS] htdocs: add an Atom feed for GCC news

2023-01-17 Thread Jose E. Marchesi via Gcc-patches


> On Wed, 11 Jan 2023, Thomas Schwinge wrote:
>> On 2022-12-23T10:50:13+0100, "Jose E. Marchesi via Gcc-patches"
>>  wrote:
>>> This patch adds an Atom feed for GCC news, which can then be easily 
>>> aggregated in other sites, such as the GNU planet 
>>> (https://planet.gnu.org).
>> I absolutely agree that providing such an RSS feed is a good thing
>> (..., and that we generally should make better use of our News section,
>> and other "PR"...) -- but I'm less convinced by the prospect of manually
>> editing the RSS 'news.xml' file, duplicating in a (potentially) different
>> format what we've got in the HTML News section.  :-|
>
> Agreed, yet...
>
>> Ideally, there'd be some simple files for News items (Markdown, or
>> similar), which are then converted into HTML News as well as RSS feed.
>> Obviously, there needs to be some consensus on what to use, and somebody
>> needs to set up the corresponding machinery...
>
> ...how are we going to get to that?
>
>
> On Thu, 12 Jan 2023, Jose E. Marchesi wrote:
>> I would like to point out that I have maintained these kind of feeds for
>> my own sites for years, and that in my humble personal experience unless
>> there are a lot of updates, like more than a couple of new entries per
>> month, any automated schema would be overkill, prone to rot, and not
>> really worth the effort.
>
> That is a bit of a concern. I'd love having a single source that feeds 
> both the News section on our main page, rolls over into news.html, and
> also feeds the Atom feed (no pun intended).
>
> On the other hand, with less than a dozen entries per year, even if 
> manually converting form one to the other takes 5 minutes, creating 
> such a machinery wouldn't amortize anytime soon

Yeah I guess it all depends on how much the news section is used.

I personally think that it would be beneficial for the different GCC
projects (front-ends, back-ends, etc) to be a little more vocal, public
wise.  Releasing news items more often may help with that.

Of course one could argue that making it easier to add news to the
system (without having to manually rotate the .html file, add to the
feed if desired, etc) would help with that.  And probably would be right
:D

>> I strongly suggest to not overengineer here [and nowhere else :)]
>
> I am tempted to agree (even if the engineer in me would prefer to avoid 
> duplication). Jose, might you be willing to help others create Atom feed
> entries?

Sure.  It is as easy as adding one of these things to the .xml file:


  Rhhw Friday 16 March 2018 - Sunday 18 March 2018 @
  Frankfurt am Main
  http://jemarch.net/rhhw.html#16march2018
  
The Rabbit Herd will be meeting the weekend from 16 March to
18 March.
  
  Mon, 12 March 2018 15:00:00 CET


To be sure nothing breaks we may run a XML validator on the server to
reject pushes that break the .xml file.  There must be an XML schema for
XML Atom feeds somewhere..

> What do others think?
>
> Gerald


Re: [PATCH] sched-deps: do not schedule pseudos across calls [PR108117]

2023-01-13 Thread Jose E. Marchesi via Gcc-patches


> On Fri, 23 Dec 2022, Jose E. Marchesi wrote:
>
>> > +1 for trying this FWIW.  There's still plenty of time to try an
>> > alternative solution if there are unexpected performance problems.
>> 
>> Let me see if Alexander's patch fixes the issue at hand (it must) and
>> will also do some regression testing.
>
> Hi, I'm not sure at which court the ball is, but in the interest at moving
> things forward here's the complete patch with the testcase. OK to
> apply?

Thanks for this.
We were actually on it, but of course busy with other stuff :)

>
> ---8<---
>
> From: Alexander Monakov 
> Date: Fri, 13 Jan 2023 21:04:02 +0300
> Subject: [PATCH] sched-deps: do not schedule pseudos across calls [PR108117]
>
> Scheduling across calls in the pre-RA scheduler is problematic: we do
> not take liveness info into account, and are thus prone to extending
> lifetime of a pseudo over the loop, requiring a callee-saved hardreg
> or causing a spill.
>
> If current function called a setjmp, lifting an assignment over a call
> may be incorrect if a longjmp would happen before the assignment.
>
> Thanks to Jose Marchesi for testing on AArch64.
>
> gcc/ChangeLog:
>
>   PR rtl-optimization/108117
>   PR rtl-optimization/108132
>   * sched-deps.cc (deps_analyze_insn): Do not schedule across
>   calls before reload.
>
> gcc/testsuite/ChangeLog:
>
>   PR rtl-optimization/108117
>   PR rtl-optimization/108132
>   * gcc.dg/pr108117.c: New test.
> ---
>  gcc/sched-deps.cc   |  9 -
>  gcc/testsuite/gcc.dg/pr108117.c | 30 ++
>  2 files changed, 38 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr108117.c
>
> diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
> index 948aa0c3b..5dc4fa4cd 100644
> --- a/gcc/sched-deps.cc
> +++ b/gcc/sched-deps.cc
> @@ -3688,7 +3688,14 @@ deps_analyze_insn (class deps_desc *deps, rtx_insn 
> *insn)
>  
>CANT_MOVE (insn) = 1;
>  
> -  if (find_reg_note (insn, REG_SETJMP, NULL))
> +  if (!reload_completed)
> + {
> +   /* Scheduling across calls may increase register pressure by extending
> +  live ranges of pseudos over the call.  Worse, in presence of setjmp
> +  it may incorrectly move up an assignment over a longjmp.  */
> +   reg_pending_barrier = MOVE_BARRIER;
> + }
> +  else if (find_reg_note (insn, REG_SETJMP, NULL))
>  {
>/* This is setjmp.  Assume that all registers, not just
>   hard registers, may be clobbered by this call.  */
> diff --git a/gcc/testsuite/gcc.dg/pr108117.c b/gcc/testsuite/gcc.dg/pr108117.c
> new file mode 100644
> index 0..ae151693e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108117.c
> @@ -0,0 +1,30 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target nonlocal_goto } */
> +/* { dg-options "-O2 -fschedule-insns" } */
> +
> +#include 
> +#include 
> +
> +jmp_buf ex_buf;
> +
> +__attribute__((noipa))
> +void fn_throw(int x)
> +{
> +   if (x)
> +  longjmp(ex_buf, 1);
> +}
> +
> +int main(void)
> +{
> +int vb = 0; // NB: not volatile, not modified after setjmp
> +
> +if (!setjmp(ex_buf)) {
> +fn_throw(1);
> +vb = 1; // not reached in the abstract machine
> +}
> +
> +if (vb) {
> +printf("Failed, vb = %d!\n", vb);
> +return 1;
> +}
> +}


Re: [PATCH,WWWDOCS] htdocs: rotate news

2023-01-12 Thread Jose E. Marchesi via Gcc-patches


> On Fri, 23 Dec 2022, Jose E. Marchesi via Gcc-patches wrote:
>>  htdocs/index.html | 24 
>>  htdocs/news.html  | 24 
>>  2 files changed, 24 insertions(+), 24 deletions(-)
>
> Okay, thank you.
>
> And you can consider this kind of change preapproved. Or falling under 
> our "obvious rule". Whichever you prefer. :-)

Understood, thanks.
Pushed.


Re: [PATCH,WWWDOCS] htdocs: news: GCC BPF in Compiler Explorer

2023-01-12 Thread Jose E. Marchesi via Gcc-patches


> On Fri, 23 Dec 2022, Jose E. Marchesi via Gcc-patches wrote:
>> This patch adds an entry to the News section in index.html, announcing
>> the availability of a nightly build of bpf-unknown-none-gcc.
>
> Nice!
>
>> +https://godbolt.org;>GCC BPF in Compiler 
>> Explorer
>> + [2022-12-23]
>> +Support for a nightly build of the bpf-unknown-none-gcc compiler
>> +  has been contributed to Compiler Explorer (aka godbolt.org) by Marc
>> +  Poulhiès
>
> Usually I recommend active voice, something like "Compiler Explorer (aka 
> godbolt.org) now supports nightly builds of the bpf-unknown-none-gcc 
> compiler thanks to Marc Poulhiès", but your proposal is perfectly fine, 
> too.
>
> Which means only change if you like the alternative apprach better 
> yourself; otherwise simply use the existing one.
>
> Either way: Okay, and thank you!

Committed, thanks.


Re: [PATCH,WWWDOCS] htdocs: add an Atom feed for GCC news

2023-01-11 Thread Jose E. Marchesi via Gcc-patches


>> Hi!
>>
>> On 2022-12-23T10:50:13+0100, "Jose E. Marchesi via Gcc-patches" 
>>  wrote:
>>> This patch adds an Atom feed for GCC news, which can then be easily
>>> aggregated in other sites, such as the GNU planet
>>> (https://planet.gnu.org).
>>>
>>> The feed lives in a file news.xml, and this patch initializes it with
>>> the latest entry in News as an example.
>>
>> I absolutely agree that providing such an RSS feed is a good thing
>> (..., and that we generally should make better use of our News section,
>> and other "PR"...) -- but I'm less convinced by the prospect of manually
>> editing the RSS 'news.xml' file, duplicating in a (potentially) different
>> format what we've got in the HTML News section.  :-|
>>
>> Ideally, there'd be some simple files for News items (Markdown, or
>> similar), which are then converted into HTML News as well as RSS feed.
>> Obviously, there needs to be some consensus on what to use, and somebody
>> needs to set up the corresponding machinery...
>>
>> Or do others think that manual 'news.xml' maintenance is not so bad (for
>> now)?
>
> I would like to point out that I have maintained these kind of feeds for
> my own sites for years, and that in my humble personal experience unless
> there are a lot of updates, like more than a couple of new entries per
> month, any automated schema would be overkill, prone to rot, and not
> really worth the effort.
>
> I strongly suggest to not overengineer here [and nowhere else :)]

I forgot to mention that it is also useful to have fine-grain control of
what you publish on what feed.

Not all the news may be appropriate for all feeds.  For example, I have
a separated feed in my site for entries I want to aggregate in the GNU
Planet.  Other stuff, which is more personal in nature, is included in a
more general feed, or not included in a feed at all.

Not sure if this really applies to the case in hand, which is the GCC
News, but that is another reason why I maintain my feeds manually as
proposed in the patch..

>
>>
>> Grüße
>>  Thomas
>>
>>
>>> ---
>>>  htdocs/index.html |  9 -
>>>  htdocs/news.xml   | 28 
>>>  2 files changed, 36 insertions(+), 1 deletion(-)
>>>  create mode 100644 htdocs/news.xml
>>>
>>> diff --git a/htdocs/index.html b/htdocs/index.html
>>> index e91fadf1..2ddee6f6 100644
>>> --- a/htdocs/index.html
>>> +++ b/htdocs/index.html
>>> @@ -6,6 +6,9 @@
>>>  >> content="FUv_3eEIkimd6LAoWned4TPMqmKKQmw3aA2_PBJ5SAY">
>>>  GCC, the GNU Compiler Collection
>>>  https://gcc.gnu.org/gcc.css;>
>>> +>> +  title="News about the GNU Compiler Collection"
>>> +  href="news.xml"/>
>>>  
>>>
>>>  
>>> @@ -48,7 +51,11 @@ mission statement.
>>>
>>>  
>>>
>>>  
>>> diff --git a/htdocs/news.xml b/htdocs/news.xml
>>> new file mode 100644
>>> index ..bebcaa66
>>> --- /dev/null
>>> +++ b/htdocs/news.xml
>>> @@ -0,0 +1,28 @@
>>> +
>>> +
>>> +
>>> +  
>>> +News about the GNU Compiler Collection
>>> +https://gcc.gnu.org
>>> +
>>> +  The GNU Compiler Collection includes front ends for C, C++,
>>> +  Objective-C, Fortran, Ada, Go, and D, as well as libraries for
>>> +  these languages (libstdc++,...). GCC was originally written as
>>> +  the compiler for the GNU operating system. The GNU system was
>>> +  developed to be 100% free software, free in the sense that it
>>> +  respects the user's freedom.
>>> +
>>> +
>>> +
>>> +  GCC BPF in Compiler Explorer
>>> +  https://godbolt.org
>>> +  
>>> +Support for a nightly build of the bpf-unknown-none-gcc
>>> +compiler has been contributed to Compiler Explorer (aka
>>> +godbolt.org) by Marc Poulhiès
>>> +  
>>> +  Fri, 23 December 2022 11:00:00 CET
>>> +
>>> +
>>> +  
>>> +
>>> --
>>> 2.30.2
>> -
>> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße
>> 201, 80634 München; Gesellschaft mit beschränkter Haftung;
>> Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft:
>> München; Registergericht München, HRB 106955


Re: [PATCH,WWWDOCS] htdocs: add an Atom feed for GCC news

2023-01-11 Thread Jose E. Marchesi via Gcc-patches


> Hi!
>
> On 2022-12-23T10:50:13+0100, "Jose E. Marchesi via Gcc-patches" 
>  wrote:
>> This patch adds an Atom feed for GCC news, which can then be easily
>> aggregated in other sites, such as the GNU planet
>> (https://planet.gnu.org).
>>
>> The feed lives in a file news.xml, and this patch initializes it with
>> the latest entry in News as an example.
>
> I absolutely agree that providing such an RSS feed is a good thing
> (..., and that we generally should make better use of our News section,
> and other "PR"...) -- but I'm less convinced by the prospect of manually
> editing the RSS 'news.xml' file, duplicating in a (potentially) different
> format what we've got in the HTML News section.  :-|
>
> Ideally, there'd be some simple files for News items (Markdown, or
> similar), which are then converted into HTML News as well as RSS feed.
> Obviously, there needs to be some consensus on what to use, and somebody
> needs to set up the corresponding machinery...
>
> Or do others think that manual 'news.xml' maintenance is not so bad (for
> now)?

I would like to point out that I have maintained these kind of feeds for
my own sites for years, and that in my humble personal experience unless
there are a lot of updates, like more than a couple of new entries per
month, any automated schema would be overkill, prone to rot, and not
really worth the effort.

I strongly suggest to not overengineer here [and nowhere else :)]

>
> Grüße
>  Thomas
>
>
>> ---
>>  htdocs/index.html |  9 -
>>  htdocs/news.xml   | 28 
>>  2 files changed, 36 insertions(+), 1 deletion(-)
>>  create mode 100644 htdocs/news.xml
>>
>> diff --git a/htdocs/index.html b/htdocs/index.html
>> index e91fadf1..2ddee6f6 100644
>> --- a/htdocs/index.html
>> +++ b/htdocs/index.html
>> @@ -6,6 +6,9 @@
>>  > content="FUv_3eEIkimd6LAoWned4TPMqmKKQmw3aA2_PBJ5SAY">
>>  GCC, the GNU Compiler Collection
>>  https://gcc.gnu.org/gcc.css;>
>> +> +  title="News about the GNU Compiler Collection"
>> +  href="news.xml"/>
>>  
>>
>>  
>> @@ -48,7 +51,11 @@ mission statement.
>>
>>  
>>
>>  
>> diff --git a/htdocs/news.xml b/htdocs/news.xml
>> new file mode 100644
>> index ..bebcaa66
>> --- /dev/null
>> +++ b/htdocs/news.xml
>> @@ -0,0 +1,28 @@
>> +
>> +
>> +
>> +  
>> +News about the GNU Compiler Collection
>> +https://gcc.gnu.org
>> +
>> +  The GNU Compiler Collection includes front ends for C, C++,
>> +  Objective-C, Fortran, Ada, Go, and D, as well as libraries for
>> +  these languages (libstdc++,...). GCC was originally written as
>> +  the compiler for the GNU operating system. The GNU system was
>> +  developed to be 100% free software, free in the sense that it
>> +  respects the user's freedom.
>> +
>> +
>> +
>> +  GCC BPF in Compiler Explorer
>> +  https://godbolt.org
>> +  
>> +Support for a nightly build of the bpf-unknown-none-gcc
>> +compiler has been contributed to Compiler Explorer (aka
>> +godbolt.org) by Marc Poulhiès
>> +  
>> +  Fri, 23 December 2022 11:00:00 CET
>> +
>> +
>> +  
>> +
>> --
>> 2.30.2
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße
> 201, 80634 München; Gesellschaft mit beschränkter Haftung;
> Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft:
> München; Registergericht München, HRB 106955


Re: [PATCH v2] bpf: correct bpf_print_operand for floats [PR108293]

2023-01-10 Thread Jose E. Marchesi via Gcc-patches


> Hi Jose,
>
> As we discussed on IRC, since we don't currently define
> TARGET_SUPPORTS_WIDE_INT it is safer to keep the handling for VOIDmode
> CONST_DOUBLEs. My current understanding is that it may be needed if the
> host is a 32-bit platform.
>
> I also added a gcc_unreachable () as you pointed out. V2 below.
> Tested with bpf-unknown-none on x86_64 host, no known regressions.
>
> WDYT?

OK for master.
Thanks!

>
> Thanks,
> David
>
> ---
>
> [Changes from v1:
>  - Keep handling for VOIDmode CONST_DOUBLE, just in case.
>  - Add a gcc_unreachable () if `op` is none of VOIDmode, SFmode,
>nor DFmode. ]
>
> The existing logic in bpf_print_operand was only correct for integral
> CONST_DOUBLEs, and emitted garbage for floating point modes. Fix it so
> floating point mode operands are correctly handled.
>
>   PR target/108293
>
> gcc/
>
>   * config/bpf/bpf.cc (bpf_print_operand): Correct handling for
>   floating point modes.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/double-1.c: New test.
>   * gcc.target/bpf/double-2.c: New test.
>   * gcc.target/bpf/float-1.c: New test.
> ---
>  gcc/config/bpf/bpf.cc   | 34 -
>  gcc/testsuite/gcc.target/bpf/double-1.c | 12 +
>  gcc/testsuite/gcc.target/bpf/double-2.c | 12 +
>  gcc/testsuite/gcc.target/bpf/float-1.c  | 12 +
>  4 files changed, 64 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/double-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/double-2.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/float-1.c
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 2aeaeaf309b..576a1fe8eab 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -880,13 +880,35 @@ bpf_print_operand (FILE *file, rtx op, int code 
> ATTRIBUTE_UNUSED)
>output_address (GET_MODE (op), XEXP (op, 0));
>break;
>  case CONST_DOUBLE:
> -  if (CONST_DOUBLE_HIGH (op))
> - fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
> -  CONST_DOUBLE_HIGH (op), CONST_DOUBLE_LOW (op));
> -  else if (CONST_DOUBLE_LOW (op) < 0)
> - fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (op));
> +  if (GET_MODE (op) == VOIDmode)
> + {
> +   if (CONST_DOUBLE_HIGH (op))
> + fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
> +  CONST_DOUBLE_HIGH (op), CONST_DOUBLE_LOW (op));
> +   else if (CONST_DOUBLE_LOW (op) < 0)
> + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (op));
> +   else
> + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (op));
> + }
>else
> - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (op));
> + {
> +   long vals[2];
> +   real_to_target (vals, CONST_DOUBLE_REAL_VALUE (op), GET_MODE (op));
> +   vals[0] &= 0x;
> +   vals[1] &= 0x;
> +   if (GET_MODE (op) == SFmode)
> + fprintf (file, "0x%08lx", vals[0]);
> +   else if (GET_MODE (op) == DFmode)
> + {
> +   /* Note: real_to_target puts vals in target word order.  */
> +   if (WORDS_BIG_ENDIAN)
> + fprintf (file, "0x%08lx%08lx", vals[0], vals[1]);
> +   else
> + fprintf (file, "0x%08lx%08lx", vals[1], vals[0]);
> + }
> +   else
> + gcc_unreachable ();
> + }
>break;
>  default:
>output_addr_const (file, op);
> diff --git a/gcc/testsuite/gcc.target/bpf/double-1.c 
> b/gcc/testsuite/gcc.target/bpf/double-1.c
> new file mode 100644
> index 000..200f1bd18f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/double-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mlittle-endian" } */
> +
> +double f;
> +double a() { f = 1.0; return 1.0; }
> +double b() { f = 2.0; return 2.0; }
> +double c() { f = 2.0; return 3.0; }
> +double d() { f = 3.0; return 3.0; }
> +
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x3ff0" 2 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4000" 3 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4008" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/double-2.c 
> b/gcc/testsuite/gcc.target/bpf/double-2.c
> new file mode 100644
> index 000..d04ddd0c575
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/double-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mbig-endian" } */
> +
> +double f;
> +double a() { f = 1.0; return 1.0; }
> +double b() { f = 2.0; return 2.0; }
> +double c() { f = 2.0; return 3.0; }
> +double d() { f = 3.0; return 3.0; }
> +
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x3ff0" 2 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4000" 3 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4008" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/float-1.c 
> 

Re: [PATCH] bpf: correct bpf_print_operand for floats [PR108293]

2023-01-10 Thread Jose E. Marchesi via Gcc-patches


Hi David.
Thanks for the patch.

> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 2aeaeaf309b..9dde3944e9c 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -880,13 +880,20 @@ bpf_print_operand (FILE *file, rtx op, int code 
> ATTRIBUTE_UNUSED)
>output_address (GET_MODE (op), XEXP (op, 0));
>break;
>  case CONST_DOUBLE:
> -  if (CONST_DOUBLE_HIGH (op))
> - fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
> -  CONST_DOUBLE_HIGH (op), CONST_DOUBLE_LOW (op));
> -  else if (CONST_DOUBLE_LOW (op) < 0)
> - fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (op));
> -  else
> - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (op));
> +  long vals[2];
> +  real_to_target (vals, CONST_DOUBLE_REAL_VALUE (op), GET_MODE (op));
> +  vals[0] &= 0x;
> +  vals[1] &= 0x;
> +  if (GET_MODE (op) == SFmode)
> + fprintf (file, "0x%08lx", vals[0]);
> +  else if (GET_MODE (op) == DFmode)
> + {
> +   /* Note: real_to_target puts vals in target word order.  */
> +   if (WORDS_BIG_ENDIAN)
> + fprintf (file, "0x%08lx%08lx", vals[0], vals[1]);
> +   else
> + fprintf (file, "0x%08lx%08lx", vals[1], vals[0]);
> + }
>break;
>  default:
>output_addr_const (file, op);

Do we want a gcc_unreachable in case the mode of `op' is not SFmode nor
DFmode?

> diff --git a/gcc/testsuite/gcc.target/bpf/double-1.c 
> b/gcc/testsuite/gcc.target/bpf/double-1.c
> new file mode 100644
> index 000..200f1bd18f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/double-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mlittle-endian" } */
> +
> +double f;
> +double a() { f = 1.0; return 1.0; }
> +double b() { f = 2.0; return 2.0; }
> +double c() { f = 2.0; return 3.0; }
> +double d() { f = 3.0; return 3.0; }
> +
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x3ff0" 2 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4000" 3 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4008" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/double-2.c 
> b/gcc/testsuite/gcc.target/bpf/double-2.c
> new file mode 100644
> index 000..d04ddd0c575
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/double-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mbig-endian" } */
> +
> +double f;
> +double a() { f = 1.0; return 1.0; }
> +double b() { f = 2.0; return 2.0; }
> +double c() { f = 2.0; return 3.0; }
> +double d() { f = 3.0; return 3.0; }
> +
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x3ff0" 2 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4000" 3 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4008" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/bpf/float-1.c 
> b/gcc/testsuite/gcc.target/bpf/float-1.c
> new file mode 100644
> index 000..05ed7bb651d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/float-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mlittle-endian" } */
> +
> +float f;
> +float a() { f = 1.0; return 1.0; }
> +float b() { f = 2.0; return 2.0; }
> +float c() { f = 2.0; return 3.0; }
> +float d() { f = 3.0; return 3.0; }
> +
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x3f80" 2 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4000" 3 } } */
> +/* { dg-final { scan-assembler-times "lddw\t%r.,0x4040" 3 } } */


Re: [PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2023-01-04 Thread Jose E. Marchesi via Gcc-patches


ping.
Would this be a good approach for fixing the issue?

> Hi Jakub.
>
>> On Thu, Dec 08, 2022 at 02:02:36PM +0100, Jose E. Marchesi wrote:
>>> So, I guess the right fix would be to call assemble_external_libcall
>>> during final?  The `.global FOO' directive would be generated
>>> immediately before the call sequence, but I guess that would be ok.
>>
>> During final only if all the targets can deal with the effects of
>> assemble_external_libcall being done in the middle of emitting assembly
>> for the function.
>>
>> Otherwise, it could be e.g. done in the first loop of shorten_branches.
>>
>> Note, in calls.cc it is done only for emit_library_call_value_1
>> and not for emit_call_1, so if we do it late, we need to be able to find
>> out what call is to a libcall and what is to a normal call.  If there is
>> no way to differentiate it right now, perhaps we need some flag somewhere,
>> say on a SYMBOL_REF.  And then assemble_external_libcall either only
>> if such a SYMBOL_REF appears in CALL_INSN or sibcall JUMP_INSN, or
>> perhaps anywhere in the function and its constant pool.
>
> Allright, the quick-and-dirty patch below seems to DTRT with simple
> examples.
>
> First, when libcalls are generated.  Note only one .global is generated
> for all calls, and actually it is around the same position than before:
>
>   $ cat foo.c
>   int foo(unsigned int len, int flag)
>   {
> if (flag)
>   return (((long)len) * 234 / 5);
> return (((long)len) * 2 / 5);
>   }
>   $ cc1 -O2 foo.c
>   $ cat foo.c
>   .file   "foo.c"
>   .text
>   .global __divdi3
>   .align  3
>   .global foo
>   .type   foo, @function
>   foo:
>   mov32   %r1,%r1
>   lsh %r2,32
>   jne %r2,0,.L5
>   mov %r2,5
>   lsh %r1,1
>   call__divdi3
>   lsh %r0,32
>   arsh%r0,32
>   exit
>   .L5:
>   mov %r2,5
>   mul %r1,234
>   call__divdi3
>   lsh %r0,32
>   arsh%r0,32
>   exit
>   .size   foo, .-foo
>   .ident  "GCC: (GNU) 13.0.0 20221207 (experimental)"
>
> Second, when libcalls are tried by expand_moddiv in a sequence, but then
> discarded and not linked in the main sequence:
>
>   $ cat foo.c
>   int foo(unsigned int len, int flag)
>   {
> if (flag)
>   return (((long)len) * 234 / 5);
> return (((long)len) * 2 / 5);
>   }
>   $ cc1 -O2 foo.c
>   $ cat foo.c
>   .file   "foo.c"
>   .text
>   .align  3
>   .global foo
>   .type   foo, @function
>   foo:
>   mov32   %r0,%r1
>   lsh %r2,32
>   jne %r2,0,.L5
>   add %r0,%r0
>   div %r0,5
>   lsh %r0,32
>   arsh%r0,32
>   exit
>   .L5:
>   mul %r0,234
>   div %r0,5
>   lsh %r0,32
>   arsh%r0,32
>   exit
>   .size   foo, .-foo
>   .ident  "GCC: (GNU) 13.0.0 20221207 (experimental)"
>
> Note the .global now is not generated, as desired.
>
> As you can see below, I am adding a new RTX flag `is_libcall', with
> written form "/l".
>
> Before I get into serious testing etc, can you please confirm whether
> this is the right approach or not?
>
> In particular, I am a little bit concerned about the expectation I am
> using that the target of the `call' instruction emitted by emit_call_1
> is always a (MEM (SYMBOL_REF ...)) when it is passed a SYMBOL_REF as the
> first argument (`fun' in emit_library_call_value_1).
>
> Thanks.
>
> diff --git a/gcc/calls.cc b/gcc/calls.cc
> index 6dd6f73e978..6c4a3725272 100644
> --- a/gcc/calls.cc
> +++ b/gcc/calls.cc
> @@ -4370,10 +4370,6 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>   || argvec[i].partial != 0)
>update_stack_alignment_for_call ([i].locate);
>  
> -  /* If this machine requires an external definition for library
> - functions, write one out.  */
> -  assemble_external_libcall (fun);
> -
>original_args_size = args_size;
>args_size.constant = (aligned_upper_bound (args_size.constant
>+ stack_pointer_delta,
> @@ -4717,6 +4713,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
> value,
>  valreg,
>  old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
>  
> +  /* Mark the emitted call as a libcall with the new flag.  */
> +  RTL_LIBCALL_P (last_call_insn ()) = 1;
> +
>if (flag_ipa_ra)
>  {
>rtx datum = orgfun;
> diff --git a/gcc/final.cc b/gcc/final.cc
> index eea572238f6..df57de5afd0 100644
> --- a/gcc/final.cc
> +++ b/gcc/final.cc
> @@ -815,6 +815,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
> reorg.cc, since the branch splitting exposes new instructions with delay
> slots.  */
>  
> +static rtx call_from_call_insn (rtx_call_insn *insn);
> +
>  void
>  shorten_branches (rtx_insn *first)
>  {
> @@ -850,6 +852,24 @@ shorten_branches (rtx_insn *first)
>for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN 

Re: [PATCH V2] Disable sched1 in functions that call setjmp

2022-12-24 Thread Jose E. Marchesi via Gcc-patches


>> Am 24.12.2022 um 09:11 schrieb Alexander Monakov via Gcc-patches 
>> :
>> 
>> 
>>> On Fri, 23 Dec 2022, Qing Zhao wrote:
>>> 
>>> BTW, Why sched1 is not enabled on x86 by default?
>> 
>> Register allocation is tricky on x86 due to small number of general-purpose
>> registers, and sched1 can make it even more difficult. I think before 
>> register
>> pressure modeling was added, sched1 could not be enabled because then 
>> allocation
>> would sometimes fail, and now there's no incentive to enable it, as it is 
>> not so
>> important for modern x86 CPUs. Perhaps someone else has a more comprehensive
>> answer.
>> 
>>> Another question is:  As discussed in the original bug PR57067:
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57067 The root cause of this
>>> issue related to the abnormal control flow edges (from setjmp/longjmp) 
>>> cannot
>>> be represented correctly at RTL stage, shall we fix this root cause 
>>> instead? 
>> 
>> You'd need an experienced reviewer to work with you, especially on high-level
>> design decisions such as "How ABNORMAL_DISPATCHER should be represented on 
>> RTL".
>> I'm afraid it's not just a matter of applying a small patch in one place.
>
> For nonlocal goto we Thread the abnormal dispatcher.  Of course by
> regenerating abnormal edges, not by keeping and modifying them.  We
> cannot re-generate the (optimal) set of abnormal edges for setjmp so
> we want to preserve those edges.  But as you say it’s a very
> non-trivial change.

Allright, so we have two short-term alternatives for at least remove the
possibility that GCC generates wrong code for valid C when the scheduler
is turned on:

a) To disable sched1 in functions that call setjmp.

b) To change deps_analyze_insn so instructions are not moved across
   function calls before register allocation (!reload_completed).

Both patches fix our particular use cases and are regression free in
aarch64-linux-gnu.

However, there is something I don't understand: wouldn't sched2
introduce the same problem when -fsched2-use-superblocks is specified?
In that case, the option a) would need to be expanded to disable sched2
as well, and b) wouldn't have effect (!after_reload)?

Using -fsched2-use-superblocks doesn't trigger the problem in our use
case.


Re: [PATCH V2] Disable sched1 in functions that call setjmp

2022-12-23 Thread Jose E. Marchesi via Gcc-patches


> On Fri, 23 Dec 2022, Jose E. Marchesi wrote:
>
>> > (scheduling across calls in sched2 is somewhat dubious as well, but
>> > it doesn't risk register pressure issues, and on VLIW CPUs it at least
>> > can result in better VLIW packing)
>> 
>> Does sched2 actually schedule across calls?  All the comments in the
>> source code stress the fact that the second scheduler pass (after
>> register allocation) works in regions that correspond to basic blocks:
>> "(after reload, each region is of one block)".
>
> A call instruction does not end a basic block.

Ok, so my original assumption in the patch explaining why I disabled
sched1 but not sched2 was not correct.  Good to know.

> (also, with -fsched2-use-superblocks sched2 works on regions like sched1)
>
> Alexander


Re: [PATCH V2] Disable sched1 in functions that call setjmp

2022-12-23 Thread Jose E. Marchesi via Gcc-patches


> On Fri, 23 Dec 2022, Qing Zhao wrote:
>> >> I am a little confused, you mean pre-RA scheduler does not look at the 
>> >> data flow
>> >> information at all when scheduling insns across calls currently?
>> > 
>> > I think it does not inspect liveness info, and may extend lifetime of a 
>> > pseudo
>> > across a call, transforming
>> > 
>> >  call foo
>> >  reg = 1
>> >  ...
>> >  use reg
>> > 
>> > to
>> > 
>> >  reg = 1
>> >  call foo
>> >  ...
>> >  use reg
>> > 
>> > but this is undesirable, because now register allocation cannot select a
>> > call-clobbered register for 'reg’.
>> Okay, thanks for the explanation.
>> 
>> Then, why not just check the liveness info instead of inhibiting all 
>> scheduling across calls?
>
> Because there's almost nothing to gain from pre-RA scheduling across calls in
> the first place. Remember that the call transfers control flow elsewhere and
> therefore the scheduler has no idea about the pipeline state after the call
> and after the return, so modeling-wise it's a gamble.
>
> For instructions that lie on a critical path such scheduling can be useful 
> when
> it substantially reduces the difference between the priority of the call and
> nearby instructions of the critical path. But we don't track which 
> instructions
> are on critical path(s) and which are not.
>
> (scheduling across calls in sched2 is somewhat dubious as well, but
> it doesn't risk register pressure issues, and on VLIW CPUs it at least
> can result in better VLIW packing)

Does sched2 actually schedule across calls?  All the comments in the
source code stress the fact that the second scheduler pass (after
register allocation) works in regions that correspond to basic blocks:
"(after reload, each region is of one block)".


Re: [PATCH V2] Disable sched1 in functions that call setjmp

2022-12-23 Thread Jose E. Marchesi via Gcc-patches


> Alexander Monakov via Gcc-patches  writes:
>> On Thu, 22 Dec 2022, Jose E. Marchesi via Gcc-patches wrote:
>>
>>> The first instruction scheduler pass reorders instructions in the TRY
>>> block in a way `b=true' gets executed before the call to the function
>>> `f'.  This optimization is wrong, because `main' calls setjmp and `f'
>>> is known to call longjmp.
>>> 
>>> As discussed in BZ 57067, the root cause for this is the fact that
>>> setjmp is not properly modeled in RTL, and therefore the backend
>>> passes have no normalized way to handle this situation.
>>> 
>>> As Alexander Monakov noted in the BZ, many RTL passes refuse to touch
>>> functions that call setjmp.  This includes for example gcse,
>>> store_motion and cprop.  This patch adds the sched1 pass to that list.
>>> 
>>> Note that the other instruction scheduling passes are still allowed to
>>> run on these functions, since they reorder instructions within basic
>>> blocks, and therefore they cannot cross function calls.
>>> 
>>> This doesn't fix the fundamental issue, but at least assures that
>>> sched1 wont perform invalid transformation in correct C programs.
>>
>> I think scheduling across calls in the pre-RA scheduler is simply an 
>> oversight,
>> we do not look at dataflow information and with 50% chance risk extending
>> lifetime of a pseudoregister across a call, causing higher register pressure 
>> at
>> the point of the call, and potentially an extra spill.
>>
>> Therefore I would suggest to indeed solve the root cause, with (untested):
>>
>> diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
>> index 948aa0c3b..343fe2bfa 100644
>> --- a/gcc/sched-deps.cc
>> +++ b/gcc/sched-deps.cc
>> @@ -3688,7 +3688,13 @@ deps_analyze_insn (class deps_desc *deps, rtx_insn 
>> *insn)
>>
>>CANT_MOVE (insn) = 1;
>>
>> -  if (find_reg_note (insn, REG_SETJMP, NULL))
>> +  if (!reload_completed)
>> +   {
>> + /* Do not schedule across calls, this is prone to extending 
>> lifetime
>> +of a pseudo and causing extra spill later on.  */
>> + reg_pending_barrier = MOVE_BARRIER;
>> +   }
>> +  else if (find_reg_note (insn, REG_SETJMP, NULL))
>>  {
>>/* This is setjmp.  Assume that all registers, not just
>>   hard registers, may be clobbered by this call.  */
>
> +1 for trying this FWIW.  There's still plenty of time to try an
> alternative solution if there are unexpected performance problems.

Let me see if Alexander's patch fixes the issue at hand (it must) and
will also do some regression testing.


[PATCH,WWWDOCS] htdocs: rotate news

2022-12-23 Thread Jose E. Marchesi via Gcc-patches
---
 htdocs/index.html | 24 
 htdocs/news.html  | 24 
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/htdocs/index.html b/htdocs/index.html
index 2ddee6f6..2ab65a95 100644
--- a/htdocs/index.html
+++ b/htdocs/index.html
@@ -92,30 +92,6 @@ mission statement.
 [2022-04-21]
 
 
-https://gcc.gnu.org/wiki/linuxplumbers2021;>GNU Tools @ 
Linux Plumbers Conference 2021
-[2021-09-15]
-Will be held online, September 20-24 2021
-
-GCC 11.2 released
-[2021-07-28]
-
-
-GCC 9.4 released
-[2021-06-01]
-
-
-GCC 8.5 released
-[2021-05-14]
-
-
-GCC 11.1 released
-[2021-04-27]
-
-
-GCC 10.3 released
-[2021-04-08]
-
-
 
 
 
diff --git a/htdocs/news.html b/htdocs/news.html
index e1384852..2a8b7feb 100644
--- a/htdocs/news.html
+++ b/htdocs/news.html
@@ -17,6 +17,30 @@
 
 
 
+https://gcc.gnu.org/wiki/linuxplumbers2021;>GNU Tools @ 
Linux Plumbers Conference 2021
+[2021-09-15]
+Will be held online, September 20-24 2021
+
+GCC 11.2 released
+[2021-07-28]
+
+
+GCC 9.4 released
+[2021-06-01]
+
+
+GCC 8.5 released
+[2021-05-14]
+
+
+GCC 11.1 released
+[2021-04-27]
+
+
+GCC 10.3 released
+[2021-04-08]
+
+
 GCC 10.2 released
 [2020-07-23]
 
-- 
2.30.2



[PATCH,WWWDOCS] htdocs: add an Atom feed for GCC news

2022-12-23 Thread Jose E. Marchesi via Gcc-patches
This patch adds an Atom feed for GCC news, which can then be easily
aggregated in other sites, such as the GNU planet
(https://planet.gnu.org).

The feed lives in a file news.xml, and this patch initializes it with
the latest entry in News as an example.
---
 htdocs/index.html |  9 -
 htdocs/news.xml   | 28 
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 htdocs/news.xml

diff --git a/htdocs/index.html b/htdocs/index.html
index e91fadf1..2ddee6f6 100644
--- a/htdocs/index.html
+++ b/htdocs/index.html
@@ -6,6 +6,9 @@
 
 GCC, the GNU Compiler Collection
 https://gcc.gnu.org/gcc.css;>
+
 
 
 
@@ -48,7 +51,11 @@ mission statement.
 
 
 
 
diff --git a/htdocs/news.xml b/htdocs/news.xml
new file mode 100644
index ..bebcaa66
--- /dev/null
+++ b/htdocs/news.xml
@@ -0,0 +1,28 @@
+
+
+
+  
+News about the GNU Compiler Collection
+https://gcc.gnu.org
+
+  The GNU Compiler Collection includes front ends for C, C++,
+  Objective-C, Fortran, Ada, Go, and D, as well as libraries for
+  these languages (libstdc++,...). GCC was originally written as
+  the compiler for the GNU operating system. The GNU system was
+  developed to be 100% free software, free in the sense that it
+  respects the user's freedom.
+
+
+
+  GCC BPF in Compiler Explorer
+  https://godbolt.org
+  
+Support for a nightly build of the bpf-unknown-none-gcc
+compiler has been contributed to Compiler Explorer (aka
+godbolt.org) by Marc Poulhiès
+  
+  Fri, 23 December 2022 11:00:00 CET
+
+
+  
+
-- 
2.30.2



[PATCH,WWWDOCS] htdocs: news: GCC BPF in Compiler Explorer

2022-12-23 Thread Jose E. Marchesi via Gcc-patches
This patch adds an entry to the News section in index.html, announcing
the availability of a nightly build of bpf-unknown-none-gcc.
---
 htdocs/index.html | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/htdocs/index.html b/htdocs/index.html
index 655b7373..e91fadf1 100644
--- a/htdocs/index.html
+++ b/htdocs/index.html
@@ -55,6 +55,12 @@ mission statement.
 News
 
 
+https://godbolt.org;>GCC BPF in Compiler Explorer
+ [2022-12-23]
+Support for a nightly build of the bpf-unknown-none-gcc compiler
+  has been contributed to Compiler Explorer (aka godbolt.org) by Marc
+  Poulhiès
+
 https://gcc.gnu.org/wiki/cauldron2022;>GNU Tools Cauldron 
2022
 [2022-09-02]
 Prague, Czech Republic and online, September 16-18 2022
-- 
2.30.2



[PATCH V2] Disable sched1 in functions that call setjmp

2022-12-22 Thread Jose E. Marchesi via Gcc-patches
When the following testcase is built with -fschedule-insns in either
x86_64 or aarch64:

  #include 
  #include 
  #include 

  jmp_buf ex_buf__;

   #define TRY do{ if( !setjmp(ex_buf__) ){
   #define CATCH } else {
   #define ETRY } }while(0)
   #define THROW longjmp(ex_buf__, 1)

  int f(int x)
  {
int arr[] = {1,2,6,8,9,10};
int lo=0;
int hi=5;

while(lo<=hi) {
  int mid=(lo+hi)/2;

  if(arr[mid]==x) {
THROW;
  } else if(arr[mid]x) {
hi=mid-1;
  }
}

return -1;
  }

  int
  main(int argc, char** argv)
  {
int a=2;
bool b=false;

TRY
{
 a=f(a);
 b=true;
}
CATCH
{
 printf("a : %d\n",a);
 printf("Got Exception!\n");
}
ETRY;

if(b) {
  printf("b is true!\n");
}
return 0;
  }

The first instruction scheduler pass reorders instructions in the TRY
block in a way `b=true' gets executed before the call to the function
`f'.  This optimization is wrong, because `main' calls setjmp and `f'
is known to call longjmp.

As discussed in BZ 57067, the root cause for this is the fact that
setjmp is not properly modeled in RTL, and therefore the backend
passes have no normalized way to handle this situation.

As Alexander Monakov noted in the BZ, many RTL passes refuse to touch
functions that call setjmp.  This includes for example gcse,
store_motion and cprop.  This patch adds the sched1 pass to that list.

Note that the other instruction scheduling passes are still allowed to
run on these functions, since they reorder instructions within basic
blocks, and therefore they cannot cross function calls.

This doesn't fix the fundamental issue, but at least assures that
sched1 wont perform invalid transformation in correct C programs.

regtested in aarch64-linux-gnu.

gcc/ChangeLog:

PR rtl-optimization/57067
* sched-rgn.cc (pass_sched::gate): Disable pass if current
function calls setjmp.
---
 gcc/sched-rgn.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index 420c45dffb4..c536d0b8dea 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -3847,7 +3847,8 @@ bool
 pass_sched::gate (function *)
 {
 #ifdef INSN_SCHEDULING
-  return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
+  return optimize > 0 && flag_schedule_insns
+&& !cfun->calls_setjmp && dbg_cnt (sched_func);
 #else
   return 0;
 #endif
-- 
2.30.2



[PATCH] Disable sched1 in functions that call setjmp

2022-12-22 Thread Jose E. Marchesi via Gcc-patches
When the following testcase is built with -fschedule-insns in either
x86_64 or aarch64:



jmp_buf ex_buf__;

int f(int x)
{
  int arr[] = {1,2,6,8,9,10};
  int lo=0;
  int hi=5;

  while(lo<=hi) {
int mid=(lo+hi)/2;

if(arr[mid]==x) {
  THROW;
} else if(arr[mid]x) {
  hi=mid-1;
}
  }

  return -1;
}

int
main(int argc, char** argv)
{
  int a=2;
  bool b=false;

  TRY
  {
   a=f(a);
   b=true;
  }
  CATCH
  {
   printf("a : %d\n",a);
   printf("Got Exception!\n");
  }
  ETRY;

  if(b) {
printf("b is true!\n");
  }
  return 0;
}


The first instruction scheduler pass reorders instructions in the TRY
block in a way `b=true' gets executed before the call to the function
`f'.  This optimization is wrong, because `main' calls setjmp and `f'
is known to call longjmp.

As discussed in BZ 57067, the root cause for this is the fact that
setjmp is not properly modeled in RTL, and therefore the backend
passes have no normalized way to handle this situation.

As Alexander Monakov noted in the BZ, many RTL passes refuse to touch
functions that call setjmp.  This includes for example gcse,
store_motion and cprop.  This patch adds the sched1 pass to that list.

Note that the other instruction scheduling passes are still allowed to
run on these functions, since they reorder instructions within basic
blocks, and therefore they cannot cross function calls.

This doesn't fix the fundamental issue, but at least assures that
sched1 wont perform invalid transformation in correct C programs.

regtested in aarch64-linux-gnu.

gcc/ChangeLog:

PR rtl-optimization/57067
* sched-rgn.cc (pass_sched::gate): Disable pass if current
function calls setjmp.
---
 gcc/sched-rgn.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index 420c45dffb4..c536d0b8dea 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -3847,7 +3847,8 @@ bool
 pass_sched::gate (function *)
 {
 #ifdef INSN_SCHEDULING
-  return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
+  return optimize > 0 && flag_schedule_insns
+&& !cfun->calls_setjmp && dbg_cnt (sched_func);
 #else
   return 0;
 #endif
-- 
2.30.2



Re: [PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2022-12-08 Thread Jose E. Marchesi via Gcc-patches


Hi Jakub.

> On Thu, Dec 08, 2022 at 02:02:36PM +0100, Jose E. Marchesi wrote:
>> So, I guess the right fix would be to call assemble_external_libcall
>> during final?  The `.global FOO' directive would be generated
>> immediately before the call sequence, but I guess that would be ok.
>
> During final only if all the targets can deal with the effects of
> assemble_external_libcall being done in the middle of emitting assembly
> for the function.
>
> Otherwise, it could be e.g. done in the first loop of shorten_branches.
>
> Note, in calls.cc it is done only for emit_library_call_value_1
> and not for emit_call_1, so if we do it late, we need to be able to find
> out what call is to a libcall and what is to a normal call.  If there is
> no way to differentiate it right now, perhaps we need some flag somewhere,
> say on a SYMBOL_REF.  And then assemble_external_libcall either only
> if such a SYMBOL_REF appears in CALL_INSN or sibcall JUMP_INSN, or
> perhaps anywhere in the function and its constant pool.

Allright, the quick-and-dirty patch below seems to DTRT with simple
examples.

First, when libcalls are generated.  Note only one .global is generated
for all calls, and actually it is around the same position than before:

  $ cat foo.c
  int foo(unsigned int len, int flag)
  {
if (flag)
  return (((long)len) * 234 / 5);
return (((long)len) * 2 / 5);
  }
  $ cc1 -O2 foo.c
  $ cat foo.c
.file   "foo.c"
.text
.global __divdi3
.align  3
.global foo
.type   foo, @function
  foo:
mov32   %r1,%r1
lsh %r2,32
jne %r2,0,.L5
mov %r2,5
lsh %r1,1
call__divdi3
lsh %r0,32
arsh%r0,32
exit
  .L5:
mov %r2,5
mul %r1,234
call__divdi3
lsh %r0,32
arsh%r0,32
exit
.size   foo, .-foo
.ident  "GCC: (GNU) 13.0.0 20221207 (experimental)"

Second, when libcalls are tried by expand_moddiv in a sequence, but then
discarded and not linked in the main sequence:

  $ cat foo.c
  int foo(unsigned int len, int flag)
  {
if (flag)
  return (((long)len) * 234 / 5);
return (((long)len) * 2 / 5);
  }
  $ cc1 -O2 foo.c
  $ cat foo.c
.file   "foo.c"
.text
.align  3
.global foo
.type   foo, @function
  foo:
mov32   %r0,%r1
lsh %r2,32
jne %r2,0,.L5
add %r0,%r0
div %r0,5
lsh %r0,32
arsh%r0,32
exit
  .L5:
mul %r0,234
div %r0,5
lsh %r0,32
arsh%r0,32
exit
.size   foo, .-foo
.ident  "GCC: (GNU) 13.0.0 20221207 (experimental)"

Note the .global now is not generated, as desired.

As you can see below, I am adding a new RTX flag `is_libcall', with
written form "/l".

Before I get into serious testing etc, can you please confirm whether
this is the right approach or not?

In particular, I am a little bit concerned about the expectation I am
using that the target of the `call' instruction emitted by emit_call_1
is always a (MEM (SYMBOL_REF ...)) when it is passed a SYMBOL_REF as the
first argument (`fun' in emit_library_call_value_1).

Thanks.

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 6dd6f73e978..6c4a3725272 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -4370,10 +4370,6 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
|| argvec[i].partial != 0)
   update_stack_alignment_for_call ([i].locate);
 
-  /* If this machine requires an external definition for library
- functions, write one out.  */
-  assemble_external_libcall (fun);
-
   original_args_size = args_size;
   args_size.constant = (aligned_upper_bound (args_size.constant
 + stack_pointer_delta,
@@ -4717,6 +4713,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
   valreg,
   old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
 
+  /* Mark the emitted call as a libcall with the new flag.  */
+  RTL_LIBCALL_P (last_call_insn ()) = 1;
+
   if (flag_ipa_ra)
 {
   rtx datum = orgfun;
diff --git a/gcc/final.cc b/gcc/final.cc
index eea572238f6..df57de5afd0 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -815,6 +815,8 @@ make_pass_compute_alignments (gcc::context *ctxt)
reorg.cc, since the branch splitting exposes new instructions with delay
slots.  */
 
+static rtx call_from_call_insn (rtx_call_insn *insn);
+
 void
 shorten_branches (rtx_insn *first)
 {
@@ -850,6 +852,24 @@ shorten_branches (rtx_insn *first)
   for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
 {
   INSN_SHUID (insn) = i++;
+
+  /* If this is a `call' instruction that implements a libcall,
+ and this machine requires an external definition for library
+ functions, write one out.  */
+  if (CALL_P 

Re: [PATCH] bpf: add define_insn for bswap

2022-12-08 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> The eBPF architecture provides 'end[be,le]' instructions for endianness
> swapping. Add a define_insn for bswap2 to use them instaed of
> falling back on a libcall.
>
> Tested on bpf-unknown-none, no known regressions.
>
> OK to commit?
> Thanks

OK for master.
Thanks!

> gcc/
>
>   * config/bpf/bpf.md (bswap2): New define_insn.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/bswap-1.c: New test.
> ---
>  gcc/config/bpf/bpf.md  | 17 +
>  gcc/testsuite/gcc.target/bpf/bswap-1.c | 23 +++
>  2 files changed, 40 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/bswap-1.c
>
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index a28021aef26..22a133f1c79 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -341,6 +341,23 @@ (define_insn "lshr3"
>"rsh\t%0,%2"
>[(set_attr "type" "")])
>  
> + Endianness conversion
> +
> +(define_mode_iterator BSM [HI SI DI])
> +(define_mode_attr endmode [(HI "16") (SI "32") (DI "64")])
> +
> +(define_insn "bswap2"
> +  [(set (match_operand:BSM 0 "register_operand""=r")
> +(bswap:BSM (match_operand:BSM 1 "register_operand" " r")))]
> +  ""
> +{
> +  if (TARGET_BIG_ENDIAN)
> +return "endle\t%0, ";
> +  else
> +return "endbe\t%0, ";
> +}
> +  [(set_attr "type" "end")])
> +
>   Conditional branches
>  
>  ;; The eBPF jump instructions use 64-bit arithmetic when evaluating
> diff --git a/gcc/testsuite/gcc.target/bpf/bswap-1.c 
> b/gcc/testsuite/gcc.target/bpf/bswap-1.c
> new file mode 100644
> index 000..4748143ada5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/bswap-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mlittle-endian" } */
> +
> +unsigned short in16 = 0x1234U;
> +unsigned int   in32 = 0x12345678U;
> +unsigned long  in64 = 0x123456789abcdef0ULL;
> +
> +unsigned short out16 = 0;
> +unsigned int   out32 = 0;
> +unsigned long  out64 = 0;
> +
> +int foo (void)
> +{
> +  out16 = __builtin_bswap16 (in16);
> +  out32 = __builtin_bswap32 (in32);
> +  out64 = __builtin_bswap64 (in64);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler "endbe\t%r., 16" } } */
> +/* { dg-final { scan-assembler "endbe\t%r., 32" } } */
> +/* { dg-final { scan-assembler "endbe\t%r., 64" } } */


Re: [PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2022-12-08 Thread Jose E. Marchesi via Gcc-patches


>> Am 08.12.2022 um 11:56 schrieb Jose E. Marchesi via Gcc-patches 
>> :
>> 
>> The expand_expr_divmod function in expr.cc attempts to optimize cases
>> where both arguments of a division/modulus are known to be positive
>> when interpreted as signed.  In these cases, both signed division and
>> unsigned division will raise the same value, and therefore the
>> cheapest option can be used.
>> 
>> In order to determine what is the cheaper option in the current
>> target, expand_expr_divmod actually expands both a signed divmod and
>> an unsigned divmod using local "sequences":
>> 
>>  start_sequence ();
>>  ...
>>  expand_divmod (... signed ...);
>>  ...
>>  end_sequence ();
>> 
>>  start_sequence ();
>>  ...
>>  expand_divmod (... unsigned ...);
>>  ...
>>  end_sequence ();
>> 
>> And then compares the cost of each generated sequence, choosing the
>> best one.  Finally, it emits the selected expanded sequence and
>> returns the rtx with the result.
>> 
>> This approach has a caveat.  Some targets do not provide instructions
>> for division/modulus instructions.  In the case of BPF, it provides
>> unsigned division/modulus, but not signed division/modulus.
>> 
>> In these cases, the expand_divmod tries can contain calls to funcalls.
>> For example, in BPF:
>> 
>>  start_sequence ();
>>  ...
>>  expand_divmod (... signed ...); -> This generates funcall to __divdi3
>>  ...
>>  end_sequence ();
>> 
>>  start_sequence ();
>>  ...
>>  expand_divmod (... unsigned ...); -> This generates direct `div' insn.
>>  ...
>>  end_sequence ();
>> 
>> The problem is that when a funcall is expanded, an accompanying global
>> symbol definition is written in the output stream:
>> 
>>  .global __divdi3
>> 
>> And this symbol definition remains in the compiled assembly file, even
>> if the sequence using the direct `div' instruction above is used.
>> 
>> This is particularly bad in BPF, because the kernel bpf loader chokes
>> on the spurious symbol __divdi3 and makes the resulting BPF object
>> unloadable (note that BPF objects are not linked before processed by
>> the kernel.)
>> 
>> In order to fix this, this patch modifies expand_expr_divmod in the
>> following way:
>> 
>> - When trying each sequence (signed, unsigned) the expand_divmod calls
>>  are told to _not_ use libcalls if everything else fails.  This is
>>  done by passing OPTAB_WIDEN as the `methods' argument.  (Before it
>>  was using the default value OPTAB_LIB_WIDEN.)
>> 
>> - If any of the tried expanded sequences contain a funcall, then the
>>  optimization is not attempted.
>
> How do libcalls appear in iff you specify OPTABS_WIDEN only?  Doesn’t
> that allow to simplify this and also use the sequence without a
> libcall?

If you pass OPTABS_WIDEN only then libcalls are not an option and (as
far as I can tell) expand_divmod returns NULL if a libcall is the only
possibility.

> Richard 
>
>> 
>> A couple of BPF tests are also added to make sure this doesn't break
>> at any point in the future.
>> 
>> Tested in bpf-unknown-none and x86_64-linux-gnu.
>> Regtested in x86_64-linux-gnu.  No regressions.
>> 
>> gcc/ChangeLog
>> 
>>* expr.cc (expand_expr_divmod): Avoid side-effects of trying
>>sequences involving funcalls in optimization.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>* gcc.target/bpf/divmod-funcall-1.c: New test.
>>* gcc.target/bpf/divmod-funcall-2.c: Likewise.
>> ---
>> gcc/expr.cc   | 44 +++
>> .../gcc.target/bpf/divmod-funcall-1.c |  8 
>> .../gcc.target/bpf/divmod-funcall-2.c |  8 
>> 3 files changed, 41 insertions(+), 19 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-1.c
>> create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-2.c
>> 
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index d9407432ea5..4d4be5d7bda 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -9168,32 +9168,38 @@ expand_expr_divmod (tree_code code, machine_mode 
>> mode, tree treeop0,
>>   do_pending_stack_adjust ();
>>   start_sequence ();
>>   rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
>> -   op0, op1, target, 1);
>> +   op0, op1, target, 1, OPTAB_WIDEN);
>>   rtx_insn *uns_insns = get_insns ()

Re: [PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2022-12-08 Thread Jose E. Marchesi via Gcc-patches


Hi Jakub.

> On Thu, Dec 08, 2022 at 11:59:44AM +0100, Jose E. Marchesi via Gcc-patches 
> wrote:
>> gcc/ChangeLog
>> 
>>  * expr.cc (expand_expr_divmod): Avoid side-effects of trying
>>  sequences involving funcalls in optimization.
>
> That looks wrong.
> The globals for mentioned calls just shouldn't be emitted during expansion,
> especially if it is bigger annoyance than just having some extra symbols
> in the symbol table.
> expand_expr_divmod is definitely not the only place where something is
> expanded and later not used, lots of other places in the expander do that,
> and more importantly, there are over 80 optimization passes after expansion,
> many of them can remove code determined to be dead, and while lots of dead
> code is removed in GIMPLE optimizations already, definitely not all.
> So, rather than add hacks for this in a single spot, much better is to emit
> the globals only for stuff that is actually needed (so during final or
> immediately before it).

Yeah I see the point.

The culprit of the leadked .global seems to be a call to
assemble_external_libcall in emit_library_call_value_1:

expand_expr_divmod
  expand_divmod -> This will result in libcall
   sign_expand_divmod
 emit_library_call_value
   emit_library_call_value_1
 ...
 /* If this machine requires an external definition for library
functions, write one out.  */
 assemble_external_libcall (fun);
 ...

The documented purpose of assemble_external_libcall is, as stated in
output.h, to "Assemble a string constant".

So, it seems to me that emit_library_call_value should not assemble
anything, since it is used by expand functions whose expansions may be
eventually discarded.

However, simply removing that call to assemble_external_libcall makes
.global declarations to not be emitted even when the funcall is actually
emitted in final:

For:

  int foo(unsigned int len)
  {
return ((long)len) * 234 / 5;
  }

we get:

.file   "foo.c"
.text
<- NO .global __divdi3
.align  3
.global foo
.type   foo, @function
  foo:
mov32   %r1,%r1
mov %r2,5
mul %r1,234
call__divdi3
exit
  .size   foo, .-foo
  .ident  "GCC: (GNU) 13.0.0 20221207 (experimental)"

Note that BPF lacks signed division instructions.

So, I guess the right fix would be to call assemble_external_libcall
during final?  The `.global FOO' directive would be generated
immediately before the call sequence, but I guess that would be ok.

WDYT?


[PATCH] expr.cc: avoid unexpected side effects in expand_expr_divmod optimization

2022-12-08 Thread Jose E. Marchesi via Gcc-patches
The expand_expr_divmod function in expr.cc attempts to optimize cases
where both arguments of a division/modulus are known to be positive
when interpreted as signed.  In these cases, both signed division and
unsigned division will raise the same value, and therefore the
cheapest option can be used.

In order to determine what is the cheaper option in the current
target, expand_expr_divmod actually expands both a signed divmod and
an unsigned divmod using local "sequences":

  start_sequence ();
  ...
  expand_divmod (... signed ...);
  ...
  end_sequence ();

  start_sequence ();
  ...
  expand_divmod (... unsigned ...);
  ...
  end_sequence ();

And then compares the cost of each generated sequence, choosing the
best one.  Finally, it emits the selected expanded sequence and
returns the rtx with the result.

This approach has a caveat.  Some targets do not provide instructions
for division/modulus instructions.  In the case of BPF, it provides
unsigned division/modulus, but not signed division/modulus.

In these cases, the expand_divmod tries can contain calls to funcalls.
For example, in BPF:

  start_sequence ();
  ...
  expand_divmod (... signed ...); -> This generates funcall to __divdi3
  ...
  end_sequence ();

  start_sequence ();
  ...
  expand_divmod (... unsigned ...); -> This generates direct `div' insn.
  ...
  end_sequence ();

The problem is that when a funcall is expanded, an accompanying global
symbol definition is written in the output stream:

  .global __divdi3

And this symbol definition remains in the compiled assembly file, even
if the sequence using the direct `div' instruction above is used.

This is particularly bad in BPF, because the kernel bpf loader chokes
on the spurious symbol __divdi3 and makes the resulting BPF object
unloadable (note that BPF objects are not linked before processed by
the kernel.)

In order to fix this, this patch modifies expand_expr_divmod in the
following way:

- When trying each sequence (signed, unsigned) the expand_divmod calls
  are told to _not_ use libcalls if everything else fails.  This is
  done by passing OPTAB_WIDEN as the `methods' argument.  (Before it
  was using the default value OPTAB_LIB_WIDEN.)

- If any of the tried expanded sequences contain a funcall, then the
  optimization is not attempted.

A couple of BPF tests are also added to make sure this doesn't break
at any point in the future.

Tested in bpf-unknown-none and x86_64-linux-gnu.
Regtested in x86_64-linux-gnu.  No regressions.

gcc/ChangeLog

* expr.cc (expand_expr_divmod): Avoid side-effects of trying
sequences involving funcalls in optimization.

gcc/testsuite/ChangeLog:

* gcc.target/bpf/divmod-funcall-1.c: New test.
* gcc.target/bpf/divmod-funcall-2.c: Likewise.
---
 gcc/expr.cc   | 44 +++
 .../gcc.target/bpf/divmod-funcall-1.c |  8 
 .../gcc.target/bpf/divmod-funcall-2.c |  8 
 3 files changed, 41 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-1.c
 create mode 100644 gcc/testsuite/gcc.target/bpf/divmod-funcall-2.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index d9407432ea5..4d4be5d7bda 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9168,32 +9168,38 @@ expand_expr_divmod (tree_code code, machine_mode mode, 
tree treeop0,
   do_pending_stack_adjust ();
   start_sequence ();
   rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
-  op0, op1, target, 1);
+  op0, op1, target, 1, OPTAB_WIDEN);
   rtx_insn *uns_insns = get_insns ();
   end_sequence ();
   start_sequence ();
   rtx sgn_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
-  op0, op1, target, 0);
+  op0, op1, target, 0, OPTAB_WIDEN);
   rtx_insn *sgn_insns = get_insns ();
   end_sequence ();
-  unsigned uns_cost = seq_cost (uns_insns, speed_p);
-  unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
 
-  /* If costs are the same then use as tie breaker the other other
-factor.  */
-  if (uns_cost == sgn_cost)
-   {
- uns_cost = seq_cost (uns_insns, !speed_p);
- sgn_cost = seq_cost (sgn_insns, !speed_p);
-   }
-
-  if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
-   {
- emit_insn (uns_insns);
- return uns_ret;
-   }
-  emit_insn (sgn_insns);
-  return sgn_ret;
+  /* Do not try to optimize if any of the sequences tried above
+ resulted in a funcall.  */
+  if (uns_ret && sgn_ret)
+{
+  unsigned uns_cost = seq_cost (uns_insns, speed_p);
+  unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
+
+  /* If costs are the same then use as tie breaker the other
+ other factor.  */
+  if (uns_cost == sgn_cost)
+{
+

Re: [PATCH] bpf: Use enum for resolved overloaded builtins

2022-11-07 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> Change several places in the eBPF backend dealing with overloaded
> built-in functions to consistently use the enum bpf_builtins type,
> rather than variously using integer constants or booleans. The result is
> eaiser to read and extend.
>
> Tested on bpf-unknown-none, no known regressions.
> OK to push?

OK.
Thanks for the patch.

>
> Thanks.
>
> gcc/
>
>   * config/bpf/bpf.cc (struct core_walk_data): Add field `which'...
>   (bpf_resolve_overloaded_builtin): ... set it here. Use values of enum
>   bpf_builtins for error checks.
>   (bpf_core_walk): Use values of enum bpf_builtins.
>   (bpf_core_newdecl): Likewise.
>   (bpf_expand_builtin): Likewise.
> ---
>  gcc/config/bpf/bpf.cc | 106 +++---
>  1 file changed, 59 insertions(+), 47 deletions(-)
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index fd4003c2bfc..16af2412bf6 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -1222,7 +1222,7 @@ bpf_expand_builtin (tree exp, rtx target 
> ATTRIBUTE_UNUSED,
>return gen_rtx_REG (ops[0].mode, BPF_R0);
>  }
>  
> -  else if (code == -1)
> +  else if (code == -BPF_BUILTIN_PRESERVE_ACCESS_INDEX)
>  {
>/* A resolved overloaded __builtin_preserve_access_index.  */
>tree arg = CALL_EXPR_ARG (exp, 0);
> @@ -1249,7 +1249,7 @@ bpf_expand_builtin (tree exp, rtx target 
> ATTRIBUTE_UNUSED,
>return expand_normal (arg);
>  }
>  
> -  else if (code == -2)
> +  else if (code == -BPF_BUILTIN_PRESERVE_FIELD_INFO)
>  {
>/* A resolved overloaded __builtin_preserve_field_info.  */
>tree src = CALL_EXPR_ARG (exp, 0);
> @@ -1444,28 +1444,37 @@ bpf_core_get_index (const tree node)
> __builtin_preserve_access_index.  */
>  
>  static tree
> -bpf_core_newdecl (tree type, bool is_pai)
> +bpf_core_newdecl (tree type, enum bpf_builtins which)
>  {
>tree rettype;
>char name[80];
>static unsigned long pai_count = 0;
>static unsigned long pfi_count = 0;
>  
> -  if (is_pai)
> +  switch (which)
>  {
> -  rettype = build_function_type_list (type, type, NULL);
> -  int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> -  len = snprintf (name + len, sizeof (name) - len, "%lu", pai_count++);
> -}
> -  else
> -{
> -  rettype = build_function_type_list (unsigned_type_node, type,
> -   unsigned_type_node, NULL);
> -  int len = snprintf (name, sizeof (name), "%s", "__builtin_pfi_");
> -  len = snprintf (name + len, sizeof (name) - len, "%lu", pfi_count++);
> +case BPF_BUILTIN_PRESERVE_ACCESS_INDEX:
> +  {
> + rettype = build_function_type_list (type, type, NULL);
> + int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> + len = snprintf (name + len, sizeof (name) - len, "%lu", pai_count++);
> +  }
> +  break;
> +
> +case BPF_BUILTIN_PRESERVE_FIELD_INFO:
> +  {
> + rettype = build_function_type_list (unsigned_type_node, type,
> + unsigned_type_node, NULL);
> + int len = snprintf (name, sizeof (name), "%s", "__builtin_pfi_");
> + len = snprintf (name + len, sizeof (name) - len, "%lu", pfi_count++);
> +  }
> +  break;
> +
> +default:
> +  gcc_unreachable ();
>  }
>  
> -  return add_builtin_function_ext_scope (name, rettype, is_pai ? -1 : -2,
> +  return add_builtin_function_ext_scope (name, rettype, -which,
>BUILT_IN_MD, NULL, NULL_TREE);
>  }
>  
> @@ -1492,6 +1501,7 @@ bpf_core_is_maybe_aggregate_access (tree expr)
>  
>  struct core_walk_data {
>location_t loc;
> +  enum bpf_builtins which;
>tree arg;
>  };
>  
> @@ -1501,7 +1511,6 @@ static tree
>  bpf_core_walk (tree *tp, int *walk_subtrees, void *data)
>  {
>struct core_walk_data *dat = (struct core_walk_data *) data;
> -  bool is_pai = dat->arg == NULL_TREE;
>  
>/* If this is a type, don't do anything. */
>if (TYPE_P (*tp))
> @@ -1510,19 +1519,21 @@ bpf_core_walk (tree *tp, int *walk_subtrees, void 
> *data)
>return NULL_TREE;
>  }
>  
> -  /* Build a new function call to a resolved builtin for the desired 
> operation.
> - If this is a preserve_field_info call, pass along the argument to the
> - resolved builtin call. */
> -  if (bpf_core_is_maybe_aggregate_access (*tp))
> -{
> -  tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp), is_pai);
> -  tree newcall;
> -  if (is_pai)
> - newcall = build_call_expr_loc (dat->loc, newdecl, 1, *tp);
> -  else
> - newcall = build_call_expr_loc (dat->loc, newdecl, 2, *tp, dat->arg);
> +  /* Build a new function call to a type-resolved temporary builtin for the
> + desired operation, and pass along args as necessary.  */
> +  tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp), dat->which);
>  
> -  *tp = newcall;
> +  if (dat->which == 

Re: [PATCH v3] bpf: add preserve_field_info builtin

2022-10-26 Thread Jose E. Marchesi via Gcc-patches


Hi David.

Thanks for the updates.
OK for master.

>>> I'm not sure whether this behavior is a known limitation or an
>>> oversight. In my opinion it makes more sense to error at compile time,
>>> becuase even after the loader patches the return value it still will
>>> not be correct for these cases.
>>>
>>> So for now I've set these cases to error out, but it would be just as
>>> simple to mimic the LLVM behavior. WDYT?
>> 
>> I would say it makes more sense to error out than to return invalid
>> data.
>> 
>> However, the divergence wrt LLVM is a concern.  What about keeping this
>> behavior in the GCC backend and simultaneously raise the issue in
>> bpf@vger?  If it was a design oversight and the change doesn't impact
>> kernel sources, they may be willing to change it.
>> 
>
> OK, I will raise the question there.
>
>>> [...]
>>> +@deftypefn {Built-in Function} unsigned int
>>> __builtin_preserve_field_info (@var{expr}, unsigned int @var{kind})
>>> +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
>>> +extract information to aid in struct/union relocations.  @var{expr} is
>>> +an access to a field of a struct or union. Depending on @var{kind}, 
>>> different
>>> +information is returned to the program. A CO-RE relocation for the access 
>>> in
>>> +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
>>> +
>>> +The following values are supported for @var{kind}:
>>> +@table @var
>>> +@item FIELD_BYTE_OFFSET = 0
>>> +The returned value is the offset, in bytes, of the field from the
>>> +beginning of the containing structure.
>> 
>> What about bit fields?  Is this the byte offset of the containing word?
>
> Yes.
>
>> 
>>> +@item FIELD_BYTE_SIZE = 1
>>> +The returned value is the size, in bytes, of the field.
>> 
>> For bit fields,  is this the size of the containing word?
>
> Right again. I have updated the docs for these two in v3.
>
>> 
>>> +@item FIELD_EXISTENCE = 2
>>> +The returned value is 1 if the field exists, 0 otherwise. Always 1 at
>>> +compile time.
>>> +
>>> +@item FIELD_SIGNEDNESS = 3
>>> +The returned value is 1 if the field is signed, 0 otherwise.
>>> +
>>> +@item FIELD_LSHIFT_U64 = 4
>>> +@itemx FIELD_RSHIFT_U64 = 5
>>> +Suppose the field were loaded into a value of FIELD_BYTE_SIZE bytes
>>> +and then zero or sign-extended to a 64-bit value. The returned value
>>> +is the number of bits of left or right shifting respectively that
>>> +would be needed to recover the original value of the field.
>> 
>> What are the semantics for bit fields?
>
> The semantics for bit fields are the same. These two are primarily
> useful for bit fields - a common case in eBPF programs is to read
> some field of a struct through a pointer. If it's a kernel struct
> that may change between versions and you are reading a bit field,
> you would use this builtin to get the eBPF loader to patch the
> appropriate steps to extract the field.
>
> So the process to read a bit field is the following:
>
>   1. read FIELD_BYTE_SIZE bytes and zero-extend the value of the
>  read into a u64
>   2. left shift the result FIELD_LSHIFT_U64 bits
>   3. if FIELD_SIGNEDNESS
>then arithmetic right-shift by FIELD_RSHIFT_U64 bits
>  otherwise
>logical right-shift by FIELD_RSHIFT_U64 bits
>
> Where all these FIELD_* values might change between kernels and
> need patching by the eBPF loader.
>
> I struggled a bit trying to find the best wording to describe this
> in the docs, and settled on adding some example code since I think
> that is the most clear.
>
> Please take a look at the updated version and let me know if you
> have any suggestions, I'm happy to hear them.
>
> Thanks
>
> ---
>
> [Changes from v2: update documentation in extend.texi]
>
> Add BPF __builtin_preserve_field_info. This builtin is used to extract
> information to facilitate struct and union relocations performed by the
> BPF loader, especially for bitfields.
>
> The builtin has the following signature:
>
>   unsigned int __builtin_preserve_field_info (EXPR, unsigned int KIND);
>
> Where EXPR is an expression accessing a field of a struct or union.
> Depending on KIND, different information is returned to the program. The
> supported values for KIND are as follows:
>
>   enum {
> FIELD_BYTE_OFFSET = 0,
> FIELD_BYTE_SIZE,
> FIELD_EXISTENCE,
> FIELD_SIGNEDNESS,
> FIELD_LSHIFT_U64,
> FIELD_RSHIFT_U64
>   };
>
> If -mco-re is in effect (explicitly or implicitly specified), a CO-RE
> relocation is added for the access in EXPR recording the relevant
> information according to KIND.
>
> gcc/
>
>   * config/bpf/bpf.cc: Support __builtin_preserve_field_info.
>   (enum bpf_builtins): Add new builtin.
>   (bpf_init_builtins): Likewise.
>   (bpf_core_field_info): New function.
>   (bpf_expand_builtin): Accomodate new builtin. Refactor adding new
>   relocation to...
>   (maybe_make_core_relo): ... here. New function.
>   

Re: [PATCH v2] bpf: add preserve_field_info builtin

2022-10-25 Thread Jose E. Marchesi via Gcc-patches


> Hi Jose,
>
> Thanks for your comments. I think I've addressed them all in the updated
> patch below.
>
>>>+  get_inner_reference (src, , , _off, , ,
>>>+   , );
>>
>>Since the information returned by the builtin is always constant
>>(positions, sizes) I think you will want to adjust the code for the
>>eventuality of variable positioned fields and also variable sized
>>fields.
>>
>>get_inner_reference sets var_off to a tree if the position of the field
>>is variable.  In these cases `bitpos' is relative to that position.
>>
>>Likewise, get_inner_reference sets `mode' is set to BLKmode and
>>`bitsize' will be set to -1.
>>
>>I'm not sure what the built-in is supposed to do/return in these cases.
>>I guess it makes sense to error out, but what does LLVM do?
>
> I would have thought erroring out the only option, but it seems that
> LLVM will return a value from the builtin and record a CO-RE relocation
> as normal.
>
> What value will be returned depends of course on KIND, but from what
> I can tell it seems that such fields are treated as having an offset of
> 0 bits and/or a size of 0 bits. For example FIELD_BYTE_SIZE for a
> flexible-length array will return 0. FIELD_RSHIFT_U64 will be
> calculated as 64 - 0 = 64.
>
> This sort of makes sense if you expect that any BPF loader will honor
> the CO-RE relocations and patch the return value before the program is
> run, i.e. the actual values at compile time are irrelevant.
>
> But, I'm not sure that BPF loaders in practice actually _can_ patch the
> return value correctly. The source of information for resolving the
> relocations is the BTF. But the BTF won't have more information about
> variable position/size members. A flexible-length array for example in
> BTF is represented as an array type with 0 elements. So the size
> calculation when patching the relocation (looking at the impl in
> libbpf) will be elem_size * nelems = 0, and the 'patched' values will
> be the same as the unpatched.
>
> I'm not sure whether this behavior is a known limitation or an
> oversight. In my opinion it makes more sense to error at compile time,
> becuase even after the loader patches the return value it still will
> not be correct for these cases.
>
> So for now I've set these cases to error out, but it would be just as
> simple to mimic the LLVM behavior. WDYT?

I would say it makes more sense to error out than to return invalid
data.

However, the divergence wrt LLVM is a concern.  What about keeping this
behavior in the GCC backend and simultaneously raise the issue in
bpf@vger?  If it was a design oversight and the change doesn't impact
kernel sources, they may be willing to change it.

>>If I read this properly, for something like:
>>
>>__builtin_preserve_field_info (a = foo.bar + bar.baz, KIND)
>>
>>On one side CO-RE relocations are computed for both foo.bar and bar.baz
>>(I see bpf_core_compute does that) as expected.
>>
>>But then the builtin returns information that can only apply to one
>>access.  Which one?
>
> Expressions like this should not be accepted by the builtin. I didn't
> consider this case in v1 so it led to an ICE. Clang rejects this
> outright and errors with "argument 1 is not a field access". It is
> actually very strict about the expressions that are accepted, unlike
> __builtin_preserve_access_index.
>
> I have updated this implementation to behave more like clang in that
> it will reject any expression that isn't directly a field access. That
> even includes rejecting things like:
>
>   __builtin_preserve_field_info (, KIND)
>
> Since unlike preserve_access_index this builtin does not actually
> perform the operation in EXPR, it makes sense to enforce that EXPR must
> be exactly a single field access.

Ok, thanks.

> [...]
> +@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info 
> (@var{expr}, unsigned int @var{kind})
> +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
> +extract information to aid in struct/union relocations.  @var{expr} is
> +an access to a field of a struct or union. Depending on @var{kind}, different
> +information is returned to the program. A CO-RE relocation for the access in
> +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
> +
> +The following values are supported for @var{kind}:
> +@table @var
> +@item FIELD_BYTE_OFFSET = 0
> +The returned value is the offset, in bytes, of the field from the
> +beginning of the containing structure.

What about bit fields?  Is this the byte offset of the containing word?

> +@item FIELD_BYTE_SIZE = 1
> +The returned value is the size, in bytes, of the field.

For bit fields,  is this the size of the containing word?

> +@item FIELD_EXISTENCE = 2
> +The returned value is 1 if the field exists, 0 otherwise. Always 1 at
> +compile time.
> +
> +@item FIELD_SIGNEDNESS = 3
> +The returned value is 1 if the field is signed, 0 otherwise.
> +
> +@item FIELD_LSHIFT_U64 = 4
> +@itemx FIELD_RSHIFT_U64 = 5
> 

Re: [PATCH] bpf: add preserve_field_info builtin

2022-10-20 Thread Jose E. Marchesi via Gcc-patches


Hi David.
Thanks for the patch.  Please see a few comments below.

> @@ -975,6 +978,161 @@ static tree bpf_core_compute (tree, vec 
> *);
>  static int bpf_core_get_index (const tree);
>  static bool is_attr_preserve_access (tree);
>  
> +static void
> +maybe_make_core_relo (tree expr, enum btf_core_reloc_kind kind)

This function is missing a comment explaining what it does.

> +{
> +  /* If we are not targetting BPF CO-RE, do not make a relocation. We
> + might not be generating any debug info at all.  */
> +  if (!TARGET_BPF_CORE)
> +return;
> +
> +  auto_vec accessors;
> +  tree container = bpf_core_compute (expr, );
> +
> +  /* Any valid use of the builtin must have at least one access. Otherwise,
> + there is nothing to record and nothing to do. This is primarily a
> + guard against optimizations leading to unexpected expressions in the
> + argument of the builtin. For example, if the builtin is used to read
> + a field of a structure which can be statically determined to hold a
> + constant value, the argument to the builtin will be optimized to that
> + constant. This is OK, and means the builtin call is superfluous.
> + e.g.
> + struct S foo;
> + foo.a = 5;
> + int x = __preserve_access_index (foo.a);
> + ... do stuff with x
> + 'foo.a' in the builtin argument will be optimized to '5' with -01+.
> + This sequence does not warrant recording a CO-RE relocation.  */
> +
> +  if (accessors.length () < 1)
> +return;
> +  accessors.reverse ();
> +
> +  rtx_code_label *label = gen_label_rtx ();
> +  LABEL_PRESERVE_P (label) = 1;
> +  emit_label (label);
> +
> +  /* Determine what output section this relocation will apply to.
> + If this function is associated with a section, use that. Otherwise,
> + fall back on '.text'.  */
> +  const char * section_name;
> +  if (current_function_decl && DECL_SECTION_NAME (current_function_decl))
> +section_name = DECL_SECTION_NAME (current_function_decl);
> +  else
> +section_name = ".text";
> +
> +  /* Add the CO-RE relocation information to the BTF container.  */
> +  bpf_core_reloc_add (TREE_TYPE (container), section_name, , label,
> +   kind);
> +}
> +
> +/* Expand a call to __builtin_preserve_field_info by evaluating the requested
> +   information about SRC according to KIND, and return a tree holding
> +   the result.  */
> +
> +static tree
> +bpf_core_field_info (tree src, enum btf_core_reloc_kind kind)
> +{
> +  unsigned int result;
> +  poly_int64 bitsize, bitpos;
> +  tree var_off;
> +  machine_mode mode;
> +  int unsignedp, reversep, volatilep;
> +
> +  get_inner_reference (src, , , _off, , ,
> +, );

Since the information returned by the builtin is always constant
(positions, sizes) I think you will want to adjust the code for the
eventuality of variable positioned fields and also variable sized
fields.

get_inner_reference sets var_off to a tree if the position of the field
is variable.  In these cases `bitpos' is relative to that position.

Likewise, get_inner_reference sets `mode' is set to BLKmode and
`bitsize' will be set to -1.

I'm not sure what the built-in is supposed to do/return in these cases.
I guess it makes sense to error out, but what does LLVM do?

> +
> +  /* Note: Use DECL_BIT_FIELD_TYPE rather than DECL_BIT_FIELD here, because 
> it
> + remembers whether the field in question was originally declared as a
> + bitfield, regardless of how it has been optimized.  */
> +  bool bitfieldp = (TREE_CODE (src) == COMPONENT_REF
> + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (src, 1)));
> +
> +  unsigned int align = TYPE_ALIGN (TREE_TYPE (src));
> +  if (TREE_CODE (src) == COMPONENT_REF)
> +{
> +  tree field = TREE_OPERAND (src, 1);
> +  if (DECL_BIT_FIELD_TYPE (field))
> + align = TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field));
> +  else
> + align = TYPE_ALIGN (TREE_TYPE (field));
> +}
> +
> +  unsigned int start_bitpos = bitpos & ~(align - 1);
> +  unsigned int end_bitpos = start_bitpos + align;
> +
> +  switch (kind)
> +{
> +case BPF_RELO_FIELD_BYTE_OFFSET:
> +  {
> + if (bitfieldp)
> +   result = start_bitpos / 8;
> + else
> +   result = bitpos / 8;
> +  }
> +  break;
> +
> +case BPF_RELO_FIELD_BYTE_SIZE:
> +  {
> + if (bitfieldp)
> +   {
> + /* To match LLVM behavior, byte size of bitfields is recorded as
> +the full size of the base type. A 3-bit bitfield of type int is
> +therefore recorded as having a byte size of 4 bytes. */
> + result = end_bitpos - start_bitpos;
> + if (result & (result - 1))
> +   error ("unsupported field expression");
> + result = result / 8;
> +   }
> + else
> +   result = bitsize / 8;
> +  }
> +  break;
> +
> +case BPF_RELO_FIELD_EXISTS:
> +  /* The field always exists at compile time.  */
> +  result = 1;
> + 

Re: [PATCH V2] place `const volatile' objects in read-only sections

2022-09-29 Thread Jose E. Marchesi via Gcc-patches


> On 8/5/22 05:41, Jose E. Marchesi via Gcc-patches wrote:
>> [Changes from V1:
>> - Added a test.]
>>
>> It is common for C BPF programs to use variables that are implicitly
>> set by the BPF loader and run-time.  It is also necessary for these
>> variables to be stored in read-only storage so the BPF verifier
>> recognizes them as such.  This leads to declarations using both
>> `const' and `volatile' qualifiers, like this:
>>
>>const volatile unsigned char is_allow_list = 0;
>>
>> Where `volatile' is used to avoid the compiler to optimize out the
>> variable, or turn it into a constant, and `const' to make sure it is
>> placed in .rodata.
>>
>> Now, it happens that:
>>
>> - GCC places `const volatile' objects in the .data section, under the
>>assumption that `volatile' somehow voids the `const'.
>>
>> - LLVM places `const volatile' objects in .rodata, under the
>>assumption that `volatile' is orthogonal to `const'.
>>
>> So there is a divergence, that has practical consequences: it makes
>> BPF programs compiled with GCC to not work properly.
>>
>> When looking into this, I found this bugzilla:
>>
>>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
>>"change semantics of const volatile variables"
>>
>> which was filed back in 2005, long ago.  This report was already
>> asking to put `const volatile' objects in .rodata, questioning the
>> current behavior.
>>
>> While discussing this in the #gcc IRC channel I was pointed out to the
>> following excerpt from the C18 spec:
>>
>> 6.7.3 Type qualifiers / 5 The properties associated with qualified
>>   types are meaningful only for expressions that are
>>   lval-values [note 135]
>>
>> 135) The implementation may place a const object that is not
>>  volatile in a read-only region of storage. Moreover, the
>>  implementation need not allocate storage for such an object if
>>  its $ address is never used.
>>
>> This footnote may be interpreted as if const objects that are volatile
>> shouldn't be put in read-only storage.  Even if I personally was not
>> very convinced of that interpretation (see my earlier comment in BZ
>> 25521) I filed the following issue in the LLVM tracker in order to
>> discuss the matter:
>>
>>https://github.com/llvm/llvm-project/issues/56468
>>
>> As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
>> reflectors about this.  He reported that the reflectors don't think
>> footnote 135 has any normative value.
>>
>> So, not having a normative mandate on either direction, there are two
>> options:
>>
>> a) To change GCC to place `const volatile' objects in .rodata instead
>> of .data.
>>
>> b) To change LLVM to place `const volatile' objects in .data instead
>> of .rodata.
>>
>> Considering that:
>>
>> - One target (bpf-unknown-none) breaks with the current GCC behavior.
>>
>> - No target/platform relies on the GCC behavior, that we know.
>>
>> - Changing the LLVM behavior at this point would be very severely
>>traumatic for the BPF people and their users.
>>
>> I think the right thing to do at this point is a).
>> Therefore this patch.
>>
>> Regtested in x86_64-linux-gnu and bpf-unknown-none.
>> No regressions observed.
>>
>> gcc/ChangeLog:
>>
>>  PR middle-end/25521
>>  * varasm.cc (categorize_decl_for_section): Place `const volatile'
>>  objects in read-only sections.
>>  (default_select_section): Likewise.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR middle-end/25521
>>  * lib/target-supports.exp (check_effective_target_elf): Define.
>>  * gcc.dg/pr25521.c: New test.
>
> The best use I've heard for const volatile is stuff like hardware
> status registers which are readonly from the standpoint of the
> compiler, but which are changed by the hardware.   But for those,
> we're looking for the const to trigger compiler diagnostics if we try
> to write the value.  The volatile (of course) indicates the value
> changes behind our back.
>
> What you're trying to do seems to parallel that case reasonably well
> for the volatile aspect.  You want to force the compiler to read the
> data for every access.
>
> Your need for the const is a bit different.  Instead of looking to get
> a diagnostic out of the compiler if its modified, you need the data to 
> live in .rodata so the BPF verifier knows the co

[COMMITTED] bpf: remove unused variables from bpf_expand_{prologue,epilogue}

2022-09-06 Thread Jose E. Marchesi via Gcc-patches


gcc/ChangeLog:

* config/bpf/bpf.cc (bpf_expand_prologue): Remove unused automatic
`insn'.
(bpf_expand_epilogue): Likewise.
---
 gcc/config/bpf/bpf.cc | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 9cb56cfb287..51055651707 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -428,7 +428,6 @@ bpf_compute_frame_layout (void)
 void
 bpf_expand_prologue (void)
 {
-  rtx insn;
   HOST_WIDE_INT size;
 
   size = (cfun->machine->local_vars_size
@@ -468,7 +467,7 @@ bpf_expand_prologue (void)
   plus_constant (DImode,
  hard_frame_pointer_rtx,
  fp_offset - 8));
- insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
+ emit_move_insn (mem, gen_rtx_REG (DImode, regno));
  fp_offset -= 8;
}
}
@@ -481,15 +480,15 @@ bpf_expand_prologue (void)
  accessor.  */
   if (cfun->calls_alloca)
 {
-  insn = emit_move_insn (stack_pointer_rtx,
-hard_frame_pointer_rtx);
+  emit_move_insn (stack_pointer_rtx,
+  hard_frame_pointer_rtx);
 
   if (size > 0)
{
- insn = emit_insn (gen_rtx_SET (stack_pointer_rtx,
-gen_rtx_PLUS (Pmode,
-  stack_pointer_rtx,
-  GEN_INT (-size;
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
+  gen_rtx_PLUS (Pmode,
+stack_pointer_rtx,
+GEN_INT (-size;
}
 }
 }
@@ -504,7 +503,6 @@ bpf_expand_epilogue (void)
  not restoring callee-saved registers in BPF.  */
   if (TARGET_XBPF)
 {
-  rtx insn;
   int regno;
   int fp_offset = -cfun->machine->local_vars_size;
 
@@ -528,7 +526,7 @@ bpf_expand_epilogue (void)
   plus_constant (DImode,
  hard_frame_pointer_rtx,
  fp_offset - 8));
- insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
+ emit_move_insn (gen_rtx_REG (DImode, regno), mem);
  fp_offset -= 8;
}
}
-- 
2.30.2



Re: [PATCH] bpf: Fix unused variable warning

2022-09-05 Thread Jose E. Marchesi via Gcc-patches


Hi Jan.

> Building for for bpf-unknown-none target, recent GCCs will issue an unused
> variable warning as the REGNO_REG_CLASS macro doesn't actually use its
> argument. Reference the argument as (void) to silence the warning.
>
> .../gcc/configure --prefix=... --enable-werror-always --enable-languages=all 
> --disable-gcov --disable-shared --disable-threads --target=bpf-unknown-none 
> --without-headers
> [...]
> make V=1 all-gcc
> [...]
>
> /usr/lib/gcc-snapshot/bin/g++  -fno-PIE -c   -g -O2   -DIN_GCC  
> -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti 
> -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic 
> -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror 
> -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../gcc/gcc -I../../gcc/gcc/. 
> -I../../gcc/gcc/../include -I../../gcc/gcc/../libcpp/include 
> -I../../gcc/gcc/../libcody  -I../../gcc/gcc/../libdecnumber 
> -I../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
> -I../../gcc/gcc/../libbacktrace   -o regcprop.o -MT regcprop.o -MMD -MP -MF 
> ./.deps/regcprop.TPo ../../gcc/gcc/regcprop.cc
> ../../gcc/gcc/regcprop.cc: In function 'bool 
> copyprop_hardreg_forward_1(basic_block, value_data*)':
> ../../gcc/gcc/regcprop.cc:794:24: error: unused variable 'regno' 
> [-Werror=unused-variable]
>   794 |   unsigned int regno = REGNO (SET_SRC (set));
>   |^
> cc1plus: all warnings being treated as errors
> make[1]: *** [Makefile:1146: regcprop.o] Error 1
> make[1]: Leaving directory 
> '/var/lib/laminar/run/gcc-bpf-unknown-none/1/toolchain-build/gcc'
> make: *** [Makefile:4565: all-gcc] Error 2
>
>
> Ok for HEAD?

OK.  Thanks.


Re: [COMMITTED] bpf: define __bpf__ as well as __BPF__ as a target macro

2022-08-30 Thread Jose E. Marchesi via Gcc-patches


> On Mon, Aug 29, 2022 at 1:16 PM Jose E. Marchesi via Gcc-patches
>  wrote:
>>
>>
>> LLVM defines both __bpf__ and __BPF_ as target macros.
>> GCC was defining only __BPF__.
>>
>> This patch defines __bpf__ as a target macro for BPF.
>> Tested in bpf-unknown-none.
>>
>> gcc/ChangeLog:
>>
>> * config/bpf/bpf.cc (bpf_target_macros): Define __bpf__ as a
>> target macro.
>> ---
>>  gcc/config/bpf/bpf.cc | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
>> index 7e37e080808..9cb56cfb287 100644
>> --- a/gcc/config/bpf/bpf.cc
>> +++ b/gcc/config/bpf/bpf.cc
>> @@ -291,6 +291,7 @@ void
>>  bpf_target_macros (cpp_reader *pfile)
>>  {
>>builtin_define ("__BPF__");
>> +  builtin_define ("__bpf__");
>>
>>if (TARGET_BIG_ENDIAN)
>>  builtin_define ("__BPF_BIG_ENDIAN__");
>> --
>> 2.30.2
>>
>
> Having multiple choices in this case seems to just add confusion to
> users and making code search slightly more inconvenient.
>
> How much code uses LLVM specific __bpf__? Can it be migrated? Should
> LLVM undefine the macro instead?

I agree that it would be better to support just one form of the target
macro.  Having two alternative forms can only lead to problems.

But I think the train left the station long ago to do any better: there
are files in the kernel tree that rely on __bpf__ and there may be BPF
programs around doing the same thing.


Re: [PATCH v2] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


> [changes from v1: simplify the new conditional logic as suggested.]
>
> The old method for computing a member index for a CO-RE relocation
> relied on a name comparison, which could SEGV if the member in question
> is itself part of an anonymous inner struct or union.
>
> This patch changes the index computation to not rely on a name, while
> maintaining the ability to account for other sibling fields which may
> not have a representation in BTF.
>
> Tested in bpf-unknown-none, no known regressions.
> OK?

OK, thank you.

> Thanks.
>
> gcc/ChangeLog:
>
>   PR target/106745
>   * config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
>   computation of index for anonymous members.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/106745
>   * gcc.target/bpf/core-pr106745.c: New test.
> ---
>  gcc/config/bpf/coreout.cc| 16 +++
>  gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
>  2 files changed, 40 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c
>
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index cceaaa969cc..8897a045ea1 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (TREE_CODE (node) == FIELD_DECL)
>  {
>const tree container = DECL_CONTEXT (node);
> -  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
>  
>/* Lookup the CTF type info for the containing type.  */
>dw_die_ref die = lookup_type_die (container);
> @@ -222,16 +221,21 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
>  return -1;
>  
> +  tree field = TYPE_FIELDS (container);
>int i = 0;
>ctf_dmdef_t * dmd;
>for (dmd = dtd->dtd_u.dtu_members;
> dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>  {
> -  if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
> -continue;
> -  if (strcmp (dmd->dmd_name, name) == 0)
> -return i;
> -  i++;
> +   bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;
> +
> +   if (field == node)
> + return field_has_btf ? i : -1;
> +
> +   if (field_has_btf)
> + i++;
> +
> +   field = DECL_CHAIN (field);
>  }
>  }
>return -1;
> diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
> b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> new file mode 100644
> index 000..9d347006a69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -gbtf -dA -mco-re" } */
> +
> +struct weird
> +{
> +  struct
> +  {
> +int b;
> +  };
> +
> +  char x;
> +
> +  union
> +  {
> +int a;
> +int c;
> +  };
> +};
> +
> +
> +int test (struct weird *arg) {
> +  int *x = __builtin_preserve_access_index (>b);
> +  int *y = __builtin_preserve_access_index (>c);
> +
> +  return *x + *y;
> +}
> +
> +
> +/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */
> +/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */


[COMMITTED] bpf: define __bpf__ as well as __BPF__ as a target macro

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


LLVM defines both __bpf__ and __BPF_ as target macros.
GCC was defining only __BPF__.

This patch defines __bpf__ as a target macro for BPF.
Tested in bpf-unknown-none.

gcc/ChangeLog:

* config/bpf/bpf.cc (bpf_target_macros): Define __bpf__ as a
target macro.
---
 gcc/config/bpf/bpf.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 7e37e080808..9cb56cfb287 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -291,6 +291,7 @@ void
 bpf_target_macros (cpp_reader *pfile)
 {
   builtin_define ("__BPF__");
+  builtin_define ("__bpf__");
 
   if (TARGET_BIG_ENDIAN)
 builtin_define ("__BPF_BIG_ENDIAN__");
-- 
2.30.2



Re: [PATCH] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> The old method for computing a member index for a CO-RE relocation
> relied on a name comparison, which could SEGV if the member in question
> is itself part of an anonymous inner struct or union.
>
> This patch changes the index computation to not rely on a name, while
> maintaining the ability to account for other sibling fields which may
> not have a representation in BTF.
>
> Tested in bpf-unknown-none, no known regressions.
> OK?
>
> Thanks.
>
> gcc/ChangeLog:
>
>   PR target/106745
>   * config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
>   computation of index for anonymous members.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/106745
>   * gcc.target/bpf/core-pr106745.c: New test.
> ---
>  gcc/config/bpf/coreout.cc| 19 +
>  gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
>  2 files changed, 44 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c
>
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index cceaaa969cc..caad4380fa1 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (TREE_CODE (node) == FIELD_DECL)
>  {
>const tree container = DECL_CONTEXT (node);
> -  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
>  
>/* Lookup the CTF type info for the containing type.  */
>dw_die_ref die = lookup_type_die (container);
> @@ -222,16 +221,26 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
>  return -1;
>  
> +  tree field = TYPE_FIELDS (container);
>int i = 0;
>ctf_dmdef_t * dmd;
>for (dmd = dtd->dtd_u.dtu_members;
> dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>  {
>if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
> -continue;
> -  if (strcmp (dmd->dmd_name, name) == 0)
> -return i;
> -  i++;
> + {
> +   /* This field does not have a BTF representation.  */
> +   if (field == node)
> + return -1;
> + }
> +   else
> + {
> +   if (field == node)
> + return i;
> +   i++;
> + }
> +
> +   field = DECL_CHAIN (field);
>  }

I find the logic of the new conditional a little difficult to follow.
What about something like this instead:

for (dmd = dtd->dtd_u.dtu_members;
 dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
  {
bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;

if (field == node)
  return field_has_btf ? i : -1;

if (field_has_btf)
  i++;
field = DECL_CHAIN (field);
  }

WDYT?

>  }
>return -1;
> diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
> b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> new file mode 100644
> index 000..9d347006a69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -gbtf -dA -mco-re" } */
> +
> +struct weird
> +{
> +  struct
> +  {
> +int b;
> +  };
> +
> +  char x;
> +
> +  union
> +  {
> +int a;
> +int c;
> +  };
> +};
> +
> +
> +int test (struct weird *arg) {
> +  int *x = __builtin_preserve_access_index (>b);
> +  int *y = __builtin_preserve_access_index (>c);
> +
> +  return *x + *y;
> +}
> +
> +
> +/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */
> +/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */


[PATCH V3] Add warning options -W[no-]compare-distinct-pointer-types

2022-08-27 Thread Jose E. Marchesi via Gcc-patches


GCC emits pedwarns unconditionally when comparing pointers of
different types, for example:

  int xdp_context (struct xdp_md *xdp)
{
void *data = (void *)(long)xdp->data;
__u32 *metadata = (void *)(long)xdp->data_meta;
__u32 ret;

if (metadata + 1 > data)
  return 0;
return 1;
   }

  /home/jemarch/foo.c: In function ‘xdp_context’:
  /home/jemarch/foo.c:15:20: warning: comparison of distinct pointer types 
lacks a cast
 15 |   if (metadata + 1 > data)
 |^

LLVM supports an option -W[no-]compare-distinct-pointer-types that can
be used in order to enable or disable the emission of such warnings.
It is enabled by default.

This patch adds the same options to GCC.

Documentation and testsuite updated included.
Regtested in x86_64-linu-gnu.
No regressions observed.

gcc/ChangeLog:

PR c/106537
* doc/invoke.texi (Option Summary): Mention
-Wcompare-distinct-pointer-types under `Warning Options'.
(Warning Options): Document -Wcompare-distinct-pointer-types.

gcc/c-family/ChangeLog:

PR c/106537
* c.opt (Wcompare-distinct-pointer-types): New option.

gcc/c/ChangeLog:

PR c/106537
* c-typeck.cc (build_binary_op): Warning on comparing distinct
pointer types only when -Wcompare-distinct-pointer-types.

gcc/testsuite/ChangeLog:

PR c/106537
* gcc.c-torture/compile/pr106537-1.c: New test.
* gcc.c-torture/compile/pr106537-2.c: Likewise.
* gcc.c-torture/compile/pr106537-3.c: Likewise.
---
 gcc/c-family/c.opt|  4 
 gcc/c/c-typeck.cc |  6 ++---
 gcc/doc/invoke.texi   |  6 +
 .../gcc.c-torture/compile/pr106537-1.c| 23 +++
 .../gcc.c-torture/compile/pr106537-2.c| 21 +
 .../gcc.c-torture/compile/pr106537-3.c| 21 +
 6 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-3.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index f776efd39d8..729b86c0287 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1848,6 +1848,10 @@ Winvalid-imported-macros
 C++ ObjC++ Var(warn_imported_macros) Warning
 Warn about macros that have conflicting header units definitions.
 
+Wcompare-distinct-pointer-types
+C ObjC Var(warn_compare_distinct_pointer_types) Warning Init(1)
+Warn if pointers of distinct types are compared without a cast.
+
 flang-info-include-translate
 C++ Var(note_include_translate_yes)
 Note #include directives translated to import declarations.
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index ee891ee33c2..dc7e8514c47 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -12422,7 +12422,7 @@ build_binary_op (location_t location, enum tree_code 
code,
  else
/* Avoid warning about the volatile ObjC EH puts on decls.  */
if (!objc_ok)
- pedwarn (location, 0,
+ pedwarn (location, OPT_Wcompare_distinct_pointer_types,
   "comparison of distinct pointer types lacks a cast");
 
  if (result_type == NULL_TREE)
@@ -12562,8 +12562,8 @@ build_binary_op (location_t location, enum tree_code 
code,
  int qual = ENCODE_QUAL_ADDR_SPACE (as_common);
  result_type = build_pointer_type
  (build_qualified_type (void_type_node, qual));
- pedwarn (location, 0,
-  "comparison of distinct pointer types lacks a cast");
+  pedwarn (location, OPT_Wcompare_distinct_pointer_types,
+   "comparison of distinct pointer types lacks a cast");
}
}
   else if (code0 == POINTER_TYPE && null_pointer_constant_p (orig_op1))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6131bfa7acf..5ef9dbe4bbe 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -341,6 +341,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wcast-align  -Wcast-align=strict  -Wcast-function-type  -Wcast-qual  @gol
 -Wchar-subscripts @gol
 -Wclobbered  -Wcomment @gol
+-Wcompare-distinct-pointer-types @gol
 -Wconversion  -Wno-coverage-mismatch  -Wno-cpp @gol
 -Wdangling-else  -Wdangling-pointer  -Wdangling-pointer=@var{n}  @gol
 -Wdate-time @gol
@@ -8648,6 +8649,11 @@ programs.
 Warn for variables that might be changed by @code{longjmp} or
 @code{vfork}.  This warning is also enabled by @option{-Wextra}.
 
+@item -Wcompare-distinct-pointer-types
+@opindex Wcompare-distinct-pointer-types
+Warn if pointers of distinct types are compared without a cast.  This
+warning is enabled by default.
+
 @item -Wconversion
 @opindex Wconversion
 @opindex Wno-conversion
diff --git 

Re: [PATCH V2] Add warning options -W[no-]compare-distinct-pointer-types

2022-08-26 Thread Jose E. Marchesi via Gcc-patches


> On Thu, 18 Aug 2022, Jose E. Marchesi via Gcc-patches wrote:
>
>> diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
>> index de8780a1502..04af02add37 100644
>> --- a/gcc/c/c-typeck.cc
>> +++ b/gcc/c/c-typeck.cc
>> @@ -12397,7 +12397,8 @@ build_binary_op (location_t location, enum tree_code 
>> code,
>>  }
>>else
>>  /* Avoid warning about the volatile ObjC EH puts on decls.  */
>> -if (!objc_ok)
>> +if (!objc_ok
>> +&& warn_compare_distinct_pointer_types)
>>pedwarn (location, 0,
>> "comparison of distinct pointer types lacks a cast");
>>  
>> @@ -12517,8 +12518,9 @@ build_binary_op (location_t location, enum tree_code 
>> code,
>>int qual = ENCODE_QUAL_ADDR_SPACE (as_common);
>>result_type = build_pointer_type
>>(build_qualified_type (void_type_node, qual));
>> -  pedwarn (location, 0,
>> -   "comparison of distinct pointer types lacks a cast");
>> +  if (warn_compare_distinct_pointer_types)
>> +pedwarn (location, 0,
>> + "comparison of distinct pointer types lacks a 
>> cast");
>
> I think this should use OPT_Wcompare_distinct_pointer_types in place of 0, 
> and then you shouldn't need to check warn_compare_distinct_pointer_types 
> (as well as the diagnostic then automatically telling the user what option 
> controls it).

Ouch, better to use pedwarn the way it is intended to be used yes.
Sorry for the silly overlook :)

Sending a V3 with this modification.


[COMMITTED] bpf: facilitate constant propagation of function addresses

2022-08-24 Thread Jose E. Marchesi via Gcc-patches


eBPF effectively supports two kind of call instructions:

- The so called pseudo-calls ("bpf to bpf").
- External calls ("bpf to kernel").

The BPF call instruction always gets an immediate argument, whose
interpretation varies depending on the purpose of the instruction:

- For pseudo-calls, the immediate argument is interpreted as a
  32-bit PC-relative displacement measured in number of 64-bit words
  minus one.

- For external calls, the immediate argument is interpreted as the
  identification of a kernel helper.

In order to differenciate both flavors of CALL instructions the SRC
field of the instruction (otherwise unused) is abused as an opcode;
if the field holds 0 the instruction is an external call, if it holds
BPF_PSEUDO_CALL the instruction is a pseudo-call.

C-to-BPF toolchains, including the GNU toolchain, use the following
practical heuristic at assembly time in order to determine what kind
of CALL instruction to generate: call instructions requiring a fixup
at assembly time are interpreted as pseudo-calls.  This means that in
practice a call instruction involving symbols at assembly time (such
as `call foo') is assembled into a pseudo-call instruction, whereas
something like `call 12' is assembled into an external call
instruction.

In both cases, the argument of CALL is an immediate: at the time of
writing eBPF lacks support for indirect calls, i.e. there is no
call-to-register instruction.

This is the reason why BPF programs, in practice, rely on certain
optimizations to happen in order to generate calls to immediates.
This is a typical example involving a kernel helper:

  static void * (*bpf_map_lookup_elem)(void *map, const void *key)
= (void *) 1;

  int foo (...)
  {
char *ret;

ret = bpf_map_lookup_elem (args...);
if (ret)
  return 1;
return 0;
  }

Note how the code above relies on the compiler to do constant
propagation so the call to bpf_map_lookup_elem can be compiled to a
`call 1' instruction.

While GCC provides a kernel_helper function declaration attribute that
can be used in a robust way to tell GCC to generate an external call
despite of optimization level and any other consideration, the Linux
kernel bpf_helpers.h file relies on tricks like the above.

This patch modifies the BPF backend to avoid SSA sparse constant
propagation to be "undone" by the expander loading the function
address into a register.  A new test is also added.

Tested in bpf-unknown-linux-gnu.
No regressions.

gcc/ChangeLog:

PR target/106733
* config/bpf/bpf.cc (bpf_legitimate_address_p): Recognize integer
constants as legitimate addresses for functions.
(bpf_small_register_classes_for_mode_p): Define target hook.

gcc/testsuite/ChangeLog:

PR target/106733
* gcc.target/bpf/constant-calls.c: Rename to ...
* gcc.target/bpf/constant-calls-1.c: and modify to not expect
failure anymore.
* gcc.target/bpf/constant-calls-2.c: New test.
---
 gcc/config/bpf/bpf.cc | 21 ++-
 .../{constant-calls.c => constant-calls-1.c}  |  1 -
 .../gcc.target/bpf/constant-calls-2.c | 16 ++
 3 files changed, 36 insertions(+), 2 deletions(-)
 rename gcc/testsuite/gcc.target/bpf/{constant-calls.c => constant-calls-1.c} 
(88%)
 create mode 100644 gcc/testsuite/gcc.target/bpf/constant-calls-2.c

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 6a0e3bbca9e..7e37e080808 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -659,12 +659,15 @@ bpf_address_base_p (rtx x, bool strict)
target machine for a memory operand of mode MODE.  */
 
 static bool
-bpf_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
+bpf_legitimate_address_p (machine_mode mode,
  rtx x,
  bool strict)
 {
   switch (GET_CODE (x))
 {
+case CONST_INT:
+  return (mode == FUNCTION_MODE);
+
 case REG:
   return bpf_address_base_p (x, strict);
 
@@ -1311,6 +1314,22 @@ bpf_core_walk (tree *tp, int *walk_subtrees, void *data)
   return NULL_TREE;
 }
 
+/* Implement target hook small_register_classes_for_mode_p.  */
+
+static bool
+bpf_small_register_classes_for_mode_p (machine_mode mode)
+{
+  if (TARGET_XBPF)
+return 1;
+  else
+/* Avoid putting function addresses in registers, as calling these
+   is not supported in eBPF.  */
+return (mode != FUNCTION_MODE);
+}
+
+#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  bpf_small_register_classes_for_mode_p
 
 /* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN (see gccint manual section
Target Macros::Misc.).
diff --git a/gcc/testsuite/gcc.target/bpf/constant-calls.c 
b/gcc/testsuite/gcc.target/bpf/constant-calls-1.c
similarity index 88%
rename from gcc/testsuite/gcc.target/bpf/constant-calls.c
rename to gcc/testsuite/gcc.target/bpf/constant-calls-1.c
index 84612a92ae9..6effc7dfdd4 100644
--- 

Re: [PATCH V2] place `const volatile' objects in read-only sections

2022-08-18 Thread Jose E. Marchesi via Gcc-patches


ping

> [Changes from V1:
> - Added a test.]
>
> It is common for C BPF programs to use variables that are implicitly
> set by the BPF loader and run-time.  It is also necessary for these
> variables to be stored in read-only storage so the BPF verifier
> recognizes them as such.  This leads to declarations using both
> `const' and `volatile' qualifiers, like this:
>
>   const volatile unsigned char is_allow_list = 0;
>
> Where `volatile' is used to avoid the compiler to optimize out the
> variable, or turn it into a constant, and `const' to make sure it is
> placed in .rodata.
>
> Now, it happens that:
>
> - GCC places `const volatile' objects in the .data section, under the
>   assumption that `volatile' somehow voids the `const'.
>
> - LLVM places `const volatile' objects in .rodata, under the
>   assumption that `volatile' is orthogonal to `const'.
>
> So there is a divergence, that has practical consequences: it makes
> BPF programs compiled with GCC to not work properly.
>
> When looking into this, I found this bugzilla:
>
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
>   "change semantics of const volatile variables"
>
> which was filed back in 2005, long ago.  This report was already
> asking to put `const volatile' objects in .rodata, questioning the
> current behavior.
>
> While discussing this in the #gcc IRC channel I was pointed out to the
> following excerpt from the C18 spec:
>
>6.7.3 Type qualifiers / 5 The properties associated with qualified
>  types are meaningful only for expressions that are
>  lval-values [note 135]
>
>135) The implementation may place a const object that is not
> volatile in a read-only region of storage. Moreover, the
> implementation need not allocate storage for such an object if
> its $ address is never used.
>
> This footnote may be interpreted as if const objects that are volatile
> shouldn't be put in read-only storage.  Even if I personally was not
> very convinced of that interpretation (see my earlier comment in BZ
> 25521) I filed the following issue in the LLVM tracker in order to
> discuss the matter:
>
>   https://github.com/llvm/llvm-project/issues/56468
>
> As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
> reflectors about this.  He reported that the reflectors don't think
> footnote 135 has any normative value.
>
> So, not having a normative mandate on either direction, there are two
> options:
>
> a) To change GCC to place `const volatile' objects in .rodata instead
>of .data.
>
> b) To change LLVM to place `const volatile' objects in .data instead
>of .rodata.
>
> Considering that:
>
> - One target (bpf-unknown-none) breaks with the current GCC behavior.
>
> - No target/platform relies on the GCC behavior, that we know.
>
> - Changing the LLVM behavior at this point would be very severely
>   traumatic for the BPF people and their users.
>
> I think the right thing to do at this point is a).
> Therefore this patch.
>
> Regtested in x86_64-linux-gnu and bpf-unknown-none.
> No regressions observed.
>
> gcc/ChangeLog:
>
>   PR middle-end/25521
>   * varasm.cc (categorize_decl_for_section): Place `const volatile'
>   objects in read-only sections.
>   (default_select_section): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>   PR middle-end/25521
>   * lib/target-supports.exp (check_effective_target_elf): Define.
>   * gcc.dg/pr25521.c: New test.
> ---
>  gcc/testsuite/gcc.dg/pr25521.c| 10 ++
>  gcc/testsuite/lib/target-supports.exp | 10 ++
>  gcc/varasm.cc |  3 ---
>  3 files changed, 20 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr25521.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr25521.c b/gcc/testsuite/gcc.dg/pr25521.c
> new file mode 100644
> index 000..74fe2ae6626
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr25521.c
> @@ -0,0 +1,10 @@
> +/* PR middle-end/25521 - place `const volatile' objects in read-only
> +   sections.
> +
> +   { dg-require-effective-target elf }
> +   { dg-do compile } */
> +
> +const volatile int foo = 30;
> +
> +
> +/* { dg-final { scan-assembler "\\.rodata" } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 04a2a8e8659..c663d59264b 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -483,6 +483,16 @@ proc check_effective_target_alias { } {
>  }
>  }
>  
> +# Returns 1 if the target uses the ELF object format, 0 otherwise.
> +
> +proc check_effective_target_elf { } {
> +if { [gcc_target_object_format] == "elf" } {
> + return 1;
> +} else {
> + return 0;
> +}
> +}
> +
>  # Returns 1 if the target toolchain supports ifunc, 0 otherwise.
>  
>  proc check_ifunc_available { } {
> diff --git a/gcc/varasm.cc b/gcc/varasm.cc
> index 4db8506b106..7864db11faf 100644
> --- a/gcc/varasm.cc
> +++ 

[PATCH V2] Add warning options -W[no-]compare-distinct-pointer-types

2022-08-18 Thread Jose E. Marchesi via Gcc-patches


Hi Joseph.

> On Fri, 5 Aug 2022, Jose E. Marchesi via Gcc-patches wrote:
>
>> +Wcompare-distinct-pointer-types
>> +C C++ Var(warn_compare_distinct_pointer_types) Warning Init(1)
>> +Warn if pointers of distinct types are compared without a cast.
>
> There's no implementation for C++ in this patch, so the option shouldn't 
> be supported for C++ in c.opt.  However, C options are normally supported 
> for Objective-C; unless you have a specific reason why Objective-C support 
> for this option would be a bad idea, "C ObjC" would be appropriate for the 
> languages.

Thanks for the review!
See a V2 of the patch with the suggested change below.



GCC emits pedwarns unconditionally when comparing pointers of
different types, for example:

  int xdp_context (struct xdp_md *xdp)
{
void *data = (void *)(long)xdp->data;
__u32 *metadata = (void *)(long)xdp->data_meta;
__u32 ret;

if (metadata + 1 > data)
  return 0;
return 1;
   }

  /home/jemarch/foo.c: In function ‘xdp_context’:
  /home/jemarch/foo.c:15:20: warning: comparison of distinct pointer types 
lacks a cast
 15 |   if (metadata + 1 > data)
 |^

LLVM supports an option -W[no-]compare-distinct-pointer-types that can
be used in order to enable or disable the emission of such warnings.
It is enabled by default.

This patch adds the same options to GCC.

Documentation and testsuite updated included.
Regtested in x86_64-linu-gnu.
No regressions observed.

gcc/ChangeLog:

PR c/106537
* doc/invoke.texi (Option Summary): Mention
-Wcompare-distinct-pointer-types under `Warning Options'.
(Warning Options): Document -Wcompare-distinct-pointer-types.

gcc/c-family/ChangeLog:

PR c/106537
* c.opt (Wcompare-distinct-pointer-types): New option.

gcc/c/ChangeLog:

PR c/106537
* c-typeck.cc (build_binary_op): Warning on comparing distinct
pointer types only when -Wcompare-distinct-pointer-types.

gcc/testsuite/ChangeLog:

PR c/106537
* gcc.c-torture/compile/pr106537-1.c: New test.
* gcc.c-torture/compile/pr106537-2.c: Likewise.
* gcc.c-torture/compile/pr106537-3.c: Likewise.
---
 gcc/c-family/c.opt|  4 
 gcc/c/c-typeck.cc |  8 ---
 gcc/doc/invoke.texi   |  6 +
 .../gcc.c-torture/compile/pr106537-1.c| 23 +++
 .../gcc.c-torture/compile/pr106537-2.c| 21 +
 .../gcc.c-torture/compile/pr106537-3.c| 21 +
 6 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-3.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index dfdebd596ef..c401c06ec0b 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1844,6 +1844,10 @@ Winvalid-imported-macros
 C++ ObjC++ Var(warn_imported_macros) Warning
 Warn about macros that have conflicting header units definitions.
 
+Wcompare-distinct-pointer-types
+C ObjC Var(warn_compare_distinct_pointer_types) Warning Init(1)
+Warn if pointers of distinct types are compared without a cast.
+
 flang-info-include-translate
 C++ Var(note_include_translate_yes)
 Note #include directives translated to import declarations.
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index de8780a1502..04af02add37 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -12397,7 +12397,8 @@ build_binary_op (location_t location, enum tree_code 
code,
}
  else
/* Avoid warning about the volatile ObjC EH puts on decls.  */
-   if (!objc_ok)
+   if (!objc_ok
+&& warn_compare_distinct_pointer_types)
  pedwarn (location, 0,
   "comparison of distinct pointer types lacks a cast");
 
@@ -12517,8 +12518,9 @@ build_binary_op (location_t location, enum tree_code 
code,
  int qual = ENCODE_QUAL_ADDR_SPACE (as_common);
  result_type = build_pointer_type
  (build_qualified_type (void_type_node, qual));
- pedwarn (location, 0,
-  "comparison of distinct pointer types lacks a cast");
+  if (warn_compare_distinct_pointer_types)
+pedwarn (location, 0,
+ "comparison of distinct pointer types lacks a cast");
}
}
   else if (code0 == POINTER_TYPE && null_pointer_constant_p (orig_op1))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1ac81ad0bb4..88b4af14d8c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -341,6 +341,7 @@ Objective-C an

[PATCH] Add warning options -W[no-]compare-distinct-pointer-types

2022-08-05 Thread Jose E. Marchesi via Gcc-patches


GCC emits pedwarns unconditionally when comparing pointers of
different types, for example:

  int xdp_context (struct xdp_md *xdp)
{
void *data = (void *)(long)xdp->data;
__u32 *metadata = (void *)(long)xdp->data_meta;
__u32 ret;

if (metadata + 1 > data)
  return 0;
return 1;
   }

  /home/jemarch/foo.c: In function ‘xdp_context’:
  /home/jemarch/foo.c:15:20: warning: comparison of distinct pointer types 
lacks a cast
 15 |   if (metadata + 1 > data)
 |^

LLVM supports an option -W[no-]compare-distinct-pointer-types that can
be used in order to enable or disable the emission of such warnings.
It is enabled by default.

This patch adds the same options to GCC.

Documentation and testsuite updated included.
Regtested in x86_64-linu-gnu.
No regressions observed.

gcc/ChangeLog:

PR c/106537
* doc/invoke.texi (Option Summary): Mention
-Wcompare-distinct-pointer-types under `Warning Options'.
(Warning Options): Document -Wcompare-distinct-pointer-types.

gcc/c-family/ChangeLog:

PR c/106537
* c.opt (Wcompare-distinct-pointer-types): New option.

gcc/c/ChangeLog:

PR c/106537
* c-typeck.cc (build_binary_op): Warning on comparing distinct
pointer types only when -Wcompare-distinct-pointer-types.

gcc/testsuite/ChangeLog:

PR c/106537
* gcc.c-torture/compile/pr106537-1.c: New test.
* gcc.c-torture/compile/pr106537-2.c: Likewise.
* gcc.c-torture/compile/pr106537-3.c: Likewise.
---
 gcc/c-family/c.opt   |  4 
 gcc/c/c-typeck.cc|  8 +---
 gcc/doc/invoke.texi  |  6 ++
 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c | 23 +++
 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr106537-3.c | 21 +
 6 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106537-3.c

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 44e1a60ce24..54e08e83eb2 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1844,6 +1844,10 @@ Winvalid-imported-macros
 C++ ObjC++ Var(warn_imported_macros) Warning
 Warn about macros that have conflicting header units definitions.
 
+Wcompare-distinct-pointer-types
+C C++ Var(warn_compare_distinct_pointer_types) Warning Init(1)
+Warn if pointers of distinct types are compared without a cast.
+
 flang-info-include-translate
 C++ Var(note_include_translate_yes)
 Note #include directives translated to import declarations.
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 8514488b7a5..40a66530586 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -12397,7 +12397,8 @@ build_binary_op (location_t location, enum tree_code 
code,
}
  else
/* Avoid warning about the volatile ObjC EH puts on decls.  */
-   if (!objc_ok)
+   if (!objc_ok
+&& warn_compare_distinct_pointer_types)
  pedwarn (location, 0,
   "comparison of distinct pointer types lacks a cast");
 
@@ -12517,8 +12518,9 @@ build_binary_op (location_t location, enum tree_code 
code,
  int qual = ENCODE_QUAL_ADDR_SPACE (as_common);
  result_type = build_pointer_type
  (build_qualified_type (void_type_node, qual));
- pedwarn (location, 0,
-  "comparison of distinct pointer types lacks a cast");
+  if (warn_compare_distinct_pointer_types)
+pedwarn (location, 0,
+ "comparison of distinct pointer types lacks a cast");
}
}
   else if (code0 == POINTER_TYPE && null_pointer_constant_p (orig_op1))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 863580b3710..a4a594a336d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -341,6 +341,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wcast-align  -Wcast-align=strict  -Wcast-function-type  -Wcast-qual  @gol
 -Wchar-subscripts @gol
 -Wclobbered  -Wcomment @gol
+-Wcompare-distinct-pointer-types @gol
 -Wconversion  -Wno-coverage-mismatch  -Wno-cpp @gol
 -Wdangling-else  -Wdangling-pointer  -Wdangling-pointer=@var{n}  @gol
 -Wdate-time @gol
@@ -8629,6 +8630,11 @@ programs.
 Warn for variables that might be changed by @code{longjmp} or
 @code{vfork}.  This warning is also enabled by @option{-Wextra}.
 
+@item -Wcompare-distinct-pointer-types
+@opindex Wcompare-distinct-pointer-types
+Warn if pointers of distinct types are compared without a cast.  This
+warning is enabled by default.
+
 @item -Wconversion
 @opindex Wconversion
 @opindex 

[PATCH V2] place `const volatile' objects in read-only sections

2022-08-05 Thread Jose E. Marchesi via Gcc-patches


[Changes from V1:
- Added a test.]

It is common for C BPF programs to use variables that are implicitly
set by the BPF loader and run-time.  It is also necessary for these
variables to be stored in read-only storage so the BPF verifier
recognizes them as such.  This leads to declarations using both
`const' and `volatile' qualifiers, like this:

  const volatile unsigned char is_allow_list = 0;

Where `volatile' is used to avoid the compiler to optimize out the
variable, or turn it into a constant, and `const' to make sure it is
placed in .rodata.

Now, it happens that:

- GCC places `const volatile' objects in the .data section, under the
  assumption that `volatile' somehow voids the `const'.

- LLVM places `const volatile' objects in .rodata, under the
  assumption that `volatile' is orthogonal to `const'.

So there is a divergence, that has practical consequences: it makes
BPF programs compiled with GCC to not work properly.

When looking into this, I found this bugzilla:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
  "change semantics of const volatile variables"

which was filed back in 2005, long ago.  This report was already
asking to put `const volatile' objects in .rodata, questioning the
current behavior.

While discussing this in the #gcc IRC channel I was pointed out to the
following excerpt from the C18 spec:

   6.7.3 Type qualifiers / 5 The properties associated with qualified
 types are meaningful only for expressions that are
 lval-values [note 135]

   135) The implementation may place a const object that is not
volatile in a read-only region of storage. Moreover, the
implementation need not allocate storage for such an object if
its $ address is never used.

This footnote may be interpreted as if const objects that are volatile
shouldn't be put in read-only storage.  Even if I personally was not
very convinced of that interpretation (see my earlier comment in BZ
25521) I filed the following issue in the LLVM tracker in order to
discuss the matter:

  https://github.com/llvm/llvm-project/issues/56468

As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
reflectors about this.  He reported that the reflectors don't think
footnote 135 has any normative value.

So, not having a normative mandate on either direction, there are two
options:

a) To change GCC to place `const volatile' objects in .rodata instead
   of .data.

b) To change LLVM to place `const volatile' objects in .data instead
   of .rodata.

Considering that:

- One target (bpf-unknown-none) breaks with the current GCC behavior.

- No target/platform relies on the GCC behavior, that we know.

- Changing the LLVM behavior at this point would be very severely
  traumatic for the BPF people and their users.

I think the right thing to do at this point is a).
Therefore this patch.

Regtested in x86_64-linux-gnu and bpf-unknown-none.
No regressions observed.

gcc/ChangeLog:

PR middle-end/25521
* varasm.cc (categorize_decl_for_section): Place `const volatile'
objects in read-only sections.
(default_select_section): Likewise.

gcc/testsuite/ChangeLog:

PR middle-end/25521
* lib/target-supports.exp (check_effective_target_elf): Define.
* gcc.dg/pr25521.c: New test.
---
 gcc/testsuite/gcc.dg/pr25521.c| 10 ++
 gcc/testsuite/lib/target-supports.exp | 10 ++
 gcc/varasm.cc |  3 ---
 3 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr25521.c

diff --git a/gcc/testsuite/gcc.dg/pr25521.c b/gcc/testsuite/gcc.dg/pr25521.c
new file mode 100644
index 000..74fe2ae6626
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr25521.c
@@ -0,0 +1,10 @@
+/* PR middle-end/25521 - place `const volatile' objects in read-only
+   sections.
+
+   { dg-require-effective-target elf }
+   { dg-do compile } */
+
+const volatile int foo = 30;
+
+
+/* { dg-final { scan-assembler "\\.rodata" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 04a2a8e8659..c663d59264b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -483,6 +483,16 @@ proc check_effective_target_alias { } {
 }
 }
 
+# Returns 1 if the target uses the ELF object format, 0 otherwise.
+
+proc check_effective_target_elf { } {
+if { [gcc_target_object_format] == "elf" } {
+   return 1;
+} else {
+   return 0;
+}
+}
+
 # Returns 1 if the target toolchain supports ifunc, 0 otherwise.
 
 proc check_ifunc_available { } {
diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index 4db8506b106..7864db11faf 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -6971,7 +6971,6 @@ default_select_section (tree decl, int reloc,
 {
   if (! ((flag_pic && reloc)
 || !TREE_READONLY (decl)
-|| TREE_SIDE_EFFECTS (decl)
 || !TREE_CONSTANT (decl)))
return 

Re: [PATCH] place `const volatile' objects in read-only sections

2022-08-05 Thread Jose E. Marchesi via Gcc-patches


Hi Richard.

> On Fri, Aug 5, 2022 at 3:27 AM Jose E. Marchesi via Gcc-patches
>  wrote:
>>
>>
>> Hi people!
>>
>> First of all, a bit of context.
>>
>> It is common for C BPF programs to use variables that are implicitly set
>> by the underlying BPF machinery and not by the program itself.  It is
>> also necessary for these variables to be stored in read-only storage so
>> the BPF verifier recognizes them as such.  This leads to declarations
>> using both `const' and `volatile' qualifiers, like this:
>>
>>   const volatile unsigned char is_allow_list = 0;
>>
>> Where `volatile' is used to avoid the compiler to optimize out the
>> variable, or turn it into a constant, and `const' to make sure it is
>> placed in .rodata.
>>
>> Now, it happens that:
>>
>> - GCC places `const volatile' objects in the .data section, under the
>>   assumption that `volatile' somehow voids the `const'.
>>
>> - LLVM places `const volatile' objects in .rodata, under the
>>   assumption that `volatile' is orthogonal to `const'.
>>
>> So there is a divergence, and this divergence has practical
>> consequences: it makes BPF programs compiled with GCC to not work
>> properly.
>>
>> When looking into this, I found this bugzilla:
>>
>>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
>>   "change semantics of const volatile variables"
>>
>> which was filed back in 2005.  This report was already asking to put
>> `const volatile' objects in .rodata, questioning the current behavior.
>>
>> While discussing this in the #gcc IRC channel I was pointed out to the
>> following excerpt from the C18 spec:
>>
>>6.7.3 Type qualifiers / 5 The properties associated with qualified
>>  types are meaningful only for expressions that are
>>  lval-values [note 135]
>>
>>
>>135) The implementation may place a const object that is not
>> volatile in a read-only region of storage. Moreover, the
>> implementation need not allocate storage for such an object if
>> its $ address is never used.
>>
>> This footnote may be interpreted as if const objects that are volatile
>> shouldn't be put in read-only storage.  Even if I was not very convinced
>> of that interpretation (see my earlier comment in BZ 25521) I filed the
>> following issue in the LLVM tracker in order to discuss the matter:
>>
>>   https://github.com/llvm/llvm-project/issues/56468
>>
>> As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
>> reflectors about this.  He reported back that the reflectors consider
>> footnote 135 has not normative value.
>>
>> So, not having a normative mandate on either direction, there are two
>> options:
>>
>> a) To change GCC to place `const volatile' objects in .rodata instead
>>of .data.
>>
>> b) To change LLVM to place `const volatile' objects in .data instead
>>of .rodata.
>>
>> Considering that:
>>
>> - One target (bpf-unknown-none) breaks with the current GCC behavior.
>>
>> - No target/platform relies on the GCC behavior, that we know.  (And it
>>   is unlikely there is any, at least for targets also supported by
>>   LLVM.)
>>
>> - Changing the LLVM behavior at this point would be very severely
>>   traumatic for the BPF people and their users.
>>
>> I think the right thing to do is a).
>> Therefore this patch.
>>
>> A note about the patch itself:
>>
>> I am not that familiar with the middle-end and in this patch I am
>> assuming that a `var|constructor + SIDE_EFFECTS' is the result of
>> `volatile' (or an equivalent language construction) and nothing else.
>> It would be good if some middle-end wizard could confirm this.
>
> Yes, for decls that sounds correct.  For a CTOR it just means
> re-evaluation is not safe.

Thanks for confirming.

>> Regtested in x86_64-linux-gnu and bpf-unknown-none.
>> No regressions observed.
>
> I think this warrants a testcase.

Sure, will add one.
What would be the right testsuite?  gcc.dg?

> I'm not sure I agree about the whole thing though, I'm leaving it
> to Joseph.
>
>> gcc/ChangeLog:
>>
>> PR middle-end/25521
>> * varasm.cc (categorize_decl_for_section): Place `const volatile'
>> objects in read-only sections.
>> (default_select_section): Likewise.
>> ---
>>  gcc/varasm.cc | 3 ---
>>  1 file changed, 3 deletions(-)
>>
>> diff

[PATCH] place `const volatile' objects in read-only sections

2022-08-04 Thread Jose E. Marchesi via Gcc-patches


Hi people!

First of all, a bit of context.

It is common for C BPF programs to use variables that are implicitly set
by the underlying BPF machinery and not by the program itself.  It is
also necessary for these variables to be stored in read-only storage so
the BPF verifier recognizes them as such.  This leads to declarations
using both `const' and `volatile' qualifiers, like this:

  const volatile unsigned char is_allow_list = 0;

Where `volatile' is used to avoid the compiler to optimize out the
variable, or turn it into a constant, and `const' to make sure it is
placed in .rodata.

Now, it happens that:

- GCC places `const volatile' objects in the .data section, under the
  assumption that `volatile' somehow voids the `const'.

- LLVM places `const volatile' objects in .rodata, under the
  assumption that `volatile' is orthogonal to `const'.

So there is a divergence, and this divergence has practical
consequences: it makes BPF programs compiled with GCC to not work
properly.

When looking into this, I found this bugzilla:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
  "change semantics of const volatile variables"

which was filed back in 2005.  This report was already asking to put
`const volatile' objects in .rodata, questioning the current behavior.

While discussing this in the #gcc IRC channel I was pointed out to the
following excerpt from the C18 spec:

   6.7.3 Type qualifiers / 5 The properties associated with qualified
 types are meaningful only for expressions that are
 lval-values [note 135]


   135) The implementation may place a const object that is not
volatile in a read-only region of storage. Moreover, the
implementation need not allocate storage for such an object if
its $ address is never used.

This footnote may be interpreted as if const objects that are volatile
shouldn't be put in read-only storage.  Even if I was not very convinced
of that interpretation (see my earlier comment in BZ 25521) I filed the
following issue in the LLVM tracker in order to discuss the matter:

  https://github.com/llvm/llvm-project/issues/56468

As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
reflectors about this.  He reported back that the reflectors consider
footnote 135 has not normative value.

So, not having a normative mandate on either direction, there are two
options:

a) To change GCC to place `const volatile' objects in .rodata instead
   of .data.

b) To change LLVM to place `const volatile' objects in .data instead
   of .rodata.

Considering that:

- One target (bpf-unknown-none) breaks with the current GCC behavior.

- No target/platform relies on the GCC behavior, that we know.  (And it
  is unlikely there is any, at least for targets also supported by
  LLVM.)

- Changing the LLVM behavior at this point would be very severely
  traumatic for the BPF people and their users.

I think the right thing to do is a).
Therefore this patch.

A note about the patch itself:

I am not that familiar with the middle-end and in this patch I am
assuming that a `var|constructor + SIDE_EFFECTS' is the result of
`volatile' (or an equivalent language construction) and nothing else.
It would be good if some middle-end wizard could confirm this.

Regtested in x86_64-linux-gnu and bpf-unknown-none.
No regressions observed.

gcc/ChangeLog:

PR middle-end/25521
* varasm.cc (categorize_decl_for_section): Place `const volatile'
objects in read-only sections.
(default_select_section): Likewise.
---
 gcc/varasm.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/varasm.cc b/gcc/varasm.cc
index 4db8506b106..7864db11faf 100644
--- a/gcc/varasm.cc
+++ b/gcc/varasm.cc
@@ -6971,7 +6971,6 @@ default_select_section (tree decl, int reloc,
 {
   if (! ((flag_pic && reloc)
 || !TREE_READONLY (decl)
-|| TREE_SIDE_EFFECTS (decl)
 || !TREE_CONSTANT (decl)))
return readonly_data_section;
 }
@@ -7005,7 +7004,6 @@ categorize_decl_for_section (const_tree decl, int reloc)
   if (bss_initializer_p (decl))
ret = SECCAT_BSS;
   else if (! TREE_READONLY (decl)
-  || TREE_SIDE_EFFECTS (decl)
   || (DECL_INITIAL (decl)
   && ! TREE_CONSTANT (DECL_INITIAL (decl
{
@@ -7046,7 +7044,6 @@ categorize_decl_for_section (const_tree decl, int reloc)
   else if (TREE_CODE (decl) == CONSTRUCTOR)
 {
   if ((reloc & targetm.asm_out.reloc_rw_mask ())
- || TREE_SIDE_EFFECTS (decl)
  || ! TREE_CONSTANT (decl))
ret = SECCAT_DATA;
   else
-- 
2.30.2



[COMMITED] testsuite: btf: fix regexps in btf-int-1.c

2022-08-03 Thread Jose E. Marchesi via Gcc-patches


The regexps in hte test btf-int-1.c were not working properly with the
commenting style of at least one target: powerpc64le-linux-gnu.  This
patch changes the test to use better regexps.

Tested in bpf-unkonwn-none, x86_64-linux-gnu and powerpc64le-linux-gnu.
Pushed to master as obvious.

gcc/testsuite/ChangeLog:

PR testsuite/106515
* gcc.dg/debug/btf/btf-int-1.c: Fix regexps in
scan-assembler-times.
---
 gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
index 87d9758e9cb..e1ed198131a 100644
--- a/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
@@ -18,10 +18,10 @@
 /* { dg-final { scan-assembler-times "\[\t \]0x100\[\t 
\]+\[^\n\]*btt_info" 9 } } */
 
 /* Check the signed flags, but not bit size. */
-/* { dg-final { scan-assembler-times "\[\t \]0x1..\[\t 
\]+\[^\n\]*bti_encoding" 4 } } */
-/* { dg-final { scan-assembler-times "\[\t \]0x..\[\t \]+\[^\n\]*bti_encoding" 
3 } } */
-/* { dg-final { scan-assembler-times "\[\t \]0x.\[\t \]+\[^\n\]*bti_encoding" 
1 } } */
-/* { dg-final { scan-assembler-times "\[\t \]0x4..\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x1\[0-9a-zA-Z\]{2}\[\t 
\]+\[^\n\]*bti_encoding" 4 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x\[0-9a-zA-Z\]{2}\[\t 
\]+\[^\n\]*bti_encoding" 3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x\[0-9a-zA-Z\]\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x4\[0-9a-zA-Z\]{2}\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
 
 /* Check that there is a string entry for each type name.  */
 /* { dg-final { scan-assembler-times "ascii \"unsigned char.0\"\[\t 
\]+\[^\n\]*btf_string" 1 } } */
-- 
2.30.2



Re: [PATCH] btf: do not use the CHAR `encoding' bit for BTF

2022-08-02 Thread Jose E. Marchesi via Gcc-patches


> On 8/2/22 08:42, Jose E. Marchesi wrote:
>> 
>>> On 7/26/22 14:58, Indu Bhagat wrote:
>>>> On 7/22/22 4:23 AM, Jose E. Marchesi via Gcc-patches wrote:
>>>>>
>>>>> Contrary to CTF and our previous expectations, as per [1], turns out
>>>>> that in BTF:
>>>>>
>>>>> 1) The `encoding' field in integer types shall not be treated as a
>>>>> bitmap, but as an enumerated, i.e. these bits are exclusive to each
>>>>> other.
>>>>>
>>>>> 2) The CHAR bit in `encoding' shall _not_ be set when emitting types
>>>>> for char nor `unsigned char'.
>>>>>
>>>>
>>>> Hmm...well.  At this time, I suggest we make a note of this in the btf.h 
>>>> for posterity that BTF_INT_CHAR is to not be used (i.e., BTF_INT_CHAR 
>>>> should not be set for char / unsigned char).
>>>
>>> Agreed it would be good to add this note.
>> 
>> Hmm, I am not sure such a comment actually belongs to include/btf.h,
>> which is not specific to the compiler and is supposed to reflect the BTF
>> format per-se.  The CHAR bit is documented in the kernel documentation
>> and it may be used at some point by bpflib, or who knows what.
>
> OK you make a good point.
>
> In that case the patch LGTM to commit. Thanks!

Pushed to master.
Thanks!

>> 
>> That's why I put the comment in btfout.cc instead, to make it clear that
>> BTF_INT_CHAR is indeed not to be set for char / unsigned char by the
>> compiler:
>> 
>>>>> +  /* In BTF the CHAR `encoding' seems to not be used, so clear it
>>>>> + here.  */
>>>>> +  dtd->dtd_u.dtu_enc.cte_format &= ~BTF_INT_CHAR;


Re: [PATCH] btf: do not use the CHAR `encoding' bit for BTF

2022-08-02 Thread Jose E. Marchesi via Gcc-patches


> On 7/26/22 14:58, Indu Bhagat wrote:
>> On 7/22/22 4:23 AM, Jose E. Marchesi via Gcc-patches wrote:
>>>
>>> Contrary to CTF and our previous expectations, as per [1], turns out
>>> that in BTF:
>>>
>>> 1) The `encoding' field in integer types shall not be treated as a
>>> bitmap, but as an enumerated, i.e. these bits are exclusive to each
>>> other.
>>>
>>> 2) The CHAR bit in `encoding' shall _not_ be set when emitting types
>>> for char nor `unsigned char'.
>>>
>> 
>> Hmm...well.  At this time, I suggest we make a note of this in the btf.h 
>> for posterity that BTF_INT_CHAR is to not be used (i.e., BTF_INT_CHAR 
>> should not be set for char / unsigned char).
>
> Agreed it would be good to add this note.

Hmm, I am not sure such a comment actually belongs to include/btf.h,
which is not specific to the compiler and is supposed to reflect the BTF
format per-se.  The CHAR bit is documented in the kernel documentation
and it may be used at some point by bpflib, or who knows what.

That's why I put the comment in btfout.cc instead, to make it clear that
BTF_INT_CHAR is indeed not to be set for char / unsigned char by the
compiler:

>>> +  /* In BTF the CHAR `encoding' seems to not be used, so clear it
>>> + here.  */
>>> +  dtd->dtd_u.dtu_enc.cte_format &= ~BTF_INT_CHAR;


Re: [PATCH V2] btf: emit linkage information in BTF_KIND_FUNC entries

2022-08-01 Thread Jose E. Marchesi via Gcc-patches


> On 7/12/22 08:13, Jose E. Marchesi via Gcc-patches wrote:
>> 
>> The kernel bpftool expects BTF_KIND_FUNC entries in BTF to include an
>> annotation reflecting the linkage of functions (static, global).  For
>> whatever reason they abuse the `vlen' field of the BTF_KIND_FUNC entry
>> instead of adding a variable-part to the record like it is done with
>> other entry kinds.
>> 
>> This patch makes GCC to include this linkage info in BTF_KIND_FUNC
>> entries.
>> 
>> Tested in bpf-unknown-none target.
>
> LGTM
> Thanks.

Pushed to master.
Thanks.

>> 
>> gcc/ChangeLog:
>> 
>>  PR debug/106263
>>  * ctfc.h (struct ctf_dtdef): Add field linkage.
>>  * ctfc.cc (ctf_add_function): Set ctti_linkage.
>>  * dwarf2ctf.cc (gen_ctf_function_type): Pass a linkage for
>>  function types and subprograms.
>>  * btfout.cc (btf_asm_func_type): Emit linkage information for the
>>  function.
>>  (btf_dtd_emit_preprocess_cb): Propagate the linkage information
>>  for functions.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  PR debug/106263
>>  * gcc.dg/debug/btf/btf-function-4.c: New test.
>>  * gcc.dg/debug/btf/btf-function-5.c: Likewise.
>> ---
>>  gcc/btfout.cc   |  6 +-
>>  gcc/ctfc.cc |  3 ++-
>>  gcc/ctfc.h  |  3 ++-
>>  gcc/dwarf2ctf.cc|  4 +++-
>>  gcc/testsuite/gcc.dg/debug/btf/btf-function-4.c | 14 ++
>>  gcc/testsuite/gcc.dg/debug/btf/btf-function-5.c | 14 ++
>>  6 files changed, 40 insertions(+), 4 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-4.c
>>  create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-5.c
>> 
>> diff --git a/gcc/btfout.cc b/gcc/btfout.cc
>> index 31af50521da..594cba84910 100644
>> --- a/gcc/btfout.cc
>> +++ b/gcc/btfout.cc
>> @@ -463,6 +463,7 @@ btf_dtd_emit_preprocess_cb (ctf_container_ref ctfc, 
>> ctf_dtdef_ref dtd)
>>ctf_dtdef_ref func_dtd = ggc_cleared_alloc ();
>>func_dtd->dtd_data = dtd->dtd_data;
>>func_dtd->dtd_data.ctti_type = dtd->dtd_type;
>> +  func_dtd->linkage = dtd->linkage;
>>  
>>vec_safe_push (funcs, func_dtd);
>>num_types_created++;
>> @@ -740,7 +741,10 @@ static void
>>  btf_asm_func_type (ctf_dtdef_ref dtd)
>>  {
>>dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
>> -  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0, 0), "btt_info");
>> +  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0,
>> + dtd->linkage),
>> +   "btt_info: kind=%u, kflag=%u, linkage=%u",
>> +   BTF_KIND_FUNC, 0, dtd->linkage);
>>dw2_asm_output_data (4, get_btf_id (dtd->dtd_data.ctti_type), "btt_type");
>>  }
>>  
>> diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
>> index f24e7bff948..9773358a475 100644
>> --- a/gcc/ctfc.cc
>> +++ b/gcc/ctfc.cc
>> @@ -777,7 +777,7 @@ ctf_add_function_arg (ctf_container_ref ctfc, dw_die_ref 
>> func,
>>  ctf_id_t
>>  ctf_add_function (ctf_container_ref ctfc, uint32_t flag, const char * name,
>>const ctf_funcinfo_t * ctc, dw_die_ref die,
>> -  bool from_global_func)
>> +  bool from_global_func, int linkage)
>>  {
>>ctf_dtdef_ref dtd;
>>ctf_id_t type;
>> @@ -791,6 +791,7 @@ ctf_add_function (ctf_container_ref ctfc, uint32_t flag, 
>> const char * name,
>>type = ctf_add_generic (ctfc, flag, name, , die);
>>  
>>dtd->from_global_func = from_global_func;
>> +  dtd->linkage = linkage;
>>dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_FUNCTION, flag, vlen);
>>/* Caller must make sure CTF types for ctc->ctc_return are already added. 
>>  */
>>dtd->dtd_data.ctti_type = (uint32_t) ctc->ctc_return;
>> diff --git a/gcc/ctfc.h b/gcc/ctfc.h
>> index 001e544ef08..bcf3a43ae1b 100644
>> --- a/gcc/ctfc.h
>> +++ b/gcc/ctfc.h
>> @@ -161,6 +161,7 @@ struct GTY ((for_user)) ctf_dtdef
>>ctf_itype_t dtd_data;   /* Type node.  */
>>bool from_global_func; /* Whether this type was added from a global
>>  function.  */
>> +  uint32_t linkage;   /* Used in function types.  0=local, 
>> 1=global.  */
>> 

Re: [PATCH 1/2] Allow subtarget customization of CC1_SPEC

2022-07-22 Thread Jose E. Marchesi via Gcc-patches


Hi Sebastian.

I find "subtarget" confusing in this context.

If it is about rtems.h, linux.h or sol2.h, wouldn't this be better
called OS_CC1_SPEC or similar?  These files specify configurations that
apply to a set of targets, not to a subset of a target...

> gcc/ChangeLog:
>
>   * gcc.cc (SUBTARGET_CC1_SPEC): Define if not defined.
>   (CC1_SPEC): Define to SUBTARGET_CC1_SPEC.
>   * config/arm/arm.h (CC1_SPEC): Remove.
>   * config/arc/arc.h (CC1_SPEC): Append SUBTARGET_CC1_SPEC.
>   * config/cris/cris.h (CC1_SPEC): Likewise.
>   * config/frv/frv.h (CC1_SPEC): Likewise.
>   * config/i386/i386.h (CC1_SPEC): Likewise.
>   * config/ia64/ia64.h (CC1_SPEC): Likewise.
>   * config/lm32/lm32.h (CC1_SPEC): Likewise.
>   * config/m32r/m32r.h (CC1_SPEC): Likewise.
>   * config/mcore/mcore.h (CC1_SPEC): Likewise.
>   * config/microblaze/microblaze.h: Likewise.
>   * config/nds32/nds32.h (CC1_SPEC): Likewise.
>   * config/nios2/nios2.h (CC1_SPEC): Likewise.
>   * config/pa/pa.h (CC1_SPEC): Likewise.
>   * config/rs6000/sysv4.h (CC1_SPEC): Likewise.
>   * config/rx/rx.h (CC1_SPEC): Likewise.
>   * config/sparc/sparc.h (CC1_SPEC): Likewise.
> ---
>  gcc/config/arc/arc.h   | 3 ++-
>  gcc/config/arm/arm.h   | 4 
>  gcc/config/cris/cris.h | 3 ++-
>  gcc/config/frv/frv.h   | 2 +-
>  gcc/config/i386/i386.h | 2 +-
>  gcc/config/ia64/ia64.h | 2 +-
>  gcc/config/lm32/lm32.h | 2 +-
>  gcc/config/m32r/m32r.h | 2 +-
>  gcc/config/mcore/mcore.h   | 2 +-
>  gcc/config/microblaze/microblaze.h | 3 ++-
>  gcc/config/nds32/nds32.h   | 2 +-
>  gcc/config/nios2/nios2.h   | 2 +-
>  gcc/config/pa/pa.h | 2 +-
>  gcc/config/rs6000/sysv4.h  | 3 ++-
>  gcc/config/rx/rx.h | 3 ++-
>  gcc/config/sparc/sparc.h   | 2 +-
>  gcc/gcc.cc | 8 +++-
>  17 files changed, 27 insertions(+), 20 deletions(-)
>
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index 539a1662084..177287b11aa 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -68,7 +68,8 @@ along with GCC; see the file COPYING3.  If not see
>  #define CC1_SPEC "%{EB:%{EL:%emay not use both -EB and -EL}} \
>  %{EB:-mbig-endian} %{EL:-mlittle-endian} \
>  %{G*}\
> -"
> +"\
> +SUBTARGET_CC1_SPEC
>  extern const char *arc_cpu_to_as (int argc, const char **argv);
>  
>  #define EXTRA_SPEC_FUNCTIONS \
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index f479540812a..24fdf7fde23 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -91,10 +91,6 @@ extern tree arm_bf16_ptr_type_node;
>  #undef  CPP_SPEC
>  #define CPP_SPEC "%(subtarget_cpp_spec)"
>  
> -#ifndef CC1_SPEC
> -#define CC1_SPEC ""
> -#endif
> -
>  /* This macro defines names of additional specifications to put in the specs
> that can be used in various specifications like CC1_SPEC.  Its definition
> is an initializer with a subgrouping for each command option.
> diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
> index 6edfe13d92c..ed89b3fa6b0 100644
> --- a/gcc/config/cris/cris.h
> +++ b/gcc/config/cris/cris.h
> @@ -135,7 +135,8 @@ extern int cris_cpu_version;
>%{metrax100:-march=v8}\
>%{march=*:-march=%*}\
>%{mcpu=*:-mcpu=%*}\
> -  %(cc1_subtarget)"
> +  %(cc1_subtarget)" \
> +  SUBTARGET_CC1_SPEC
>  
>  /* For the cris-*-elf subtarget.  */
>  #define CRIS_CC1_SUBTARGET_SPEC \
> diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
> index 8cd67f75b09..b0f39ee238e 100644
> --- a/gcc/config/frv/frv.h
> +++ b/gcc/config/frv/frv.h
> @@ -115,7 +115,7 @@
>  
> Do not define this macro if it does not need to do anything.  */
>  /* For ABI compliance, we need to put bss data into the normal data section. 
>  */
> -#define CC1_SPEC "%{G*}"
> +#define CC1_SPEC "%{G*}" SUBTARGET_CC1_SPEC
>  
>  #undef   LINK_SPEC
>  #define LINK_SPEC "\
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index f16df633e84..f1ceb6b2557 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -614,7 +614,7 @@ extern const char *host_detect_local_cpu (int argc, const 
> char **argv);
>  #define TARGET_D_HAS_STDCALL_CONVENTION ix86_d_has_stdcall_convention
>  
>  #ifndef CC1_SPEC
> -#define CC1_SPEC "%(cc1_cpu) "
> +#define CC1_SPEC "%(cc1_cpu) " SUBTARGET_CC1_SPEC
>  #endif
>  
>  /* This macro defines names of additional specifications to put in the
> diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
> index bd0ef35e9a4..0e11cef0edc 100644
> --- a/gcc/config/ia64/ia64.h
> +++ b/gcc/config/ia64/ia64.h
> @@ -51,7 +51,7 @@ do {\
>{ "asm_extra", 

[PATCH] btf: do not use the CHAR `encoding' bit for BTF

2022-07-22 Thread Jose E. Marchesi via Gcc-patches


Contrary to CTF and our previous expectations, as per [1], turns out
that in BTF:

1) The `encoding' field in integer types shall not be treated as a
   bitmap, but as an enumerated, i.e. these bits are exclusive to each
   other.

2) The CHAR bit in `encoding' shall _not_ be set when emitting types
   for char nor `unsigned char'.

Consequently this patch clears the CHAR bit before emitting the
variable part of BTF integral types.  It also updates the testsuite
accordingly, expanding it to check for BOOL bits.

[1] https://lore.kernel.org/bpf/a73586ad-f2dc-0401-1eba-2004357b7...@fb.com/T/#t

gcc/ChangeLog:

* btfout.cc (output_asm_btf_vlen_bytes): Do not use the CHAR
encoding bit in BTF.

gcc/testsuite/ChangeLog:

* gcc.dg/debug/btf/btf-int-1.c: Do not check for char bits in
bti_encoding and check for bool bits.
---
 gcc/btfout.cc  |  4 
 gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c | 18 +++---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 31af50521da..576f73d47cf 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -914,6 +914,10 @@ output_asm_btf_vlen_bytes (ctf_container_ref ctfc, 
ctf_dtdef_ref dtd)
   if (dtd->dtd_data.ctti_size < 1)
break;
 
+  /* In BTF the CHAR `encoding' seems to not be used, so clear it
+ here.  */
+  dtd->dtd_u.dtu_enc.cte_format &= ~BTF_INT_CHAR;
+
   encoding = BTF_INT_DATA (dtd->dtd_u.dtu_enc.cte_format,
   dtd->dtd_u.dtu_enc.cte_offset,
   dtd->dtd_u.dtu_enc.cte_bits);
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
index 2381decd6ff..87d9758e9cb 100644
--- a/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-int-1.c
@@ -4,7 +4,8 @@
| 0 | encoding | offset | 00 | bits |
encoding:
  signed  1 << 24
- char2 << 24
+ char2 << 24  (not used)
+ bool4 << 24
 
All offsets in this test should be 0.
This test does _not_ check number of bits, as it may vary between targets.
@@ -13,13 +14,14 @@
 /* { dg-do compile } */
 /* { dg-options "-O0 -gbtf -dA" } */
 
-/* Check for 8 BTF_KIND_INT types.  */
-/* { dg-final { scan-assembler-times "\[\t \]0x100\[\t 
\]+\[^\n\]*btt_info" 8 } } */
+/* Check for 9 BTF_KIND_INT types.  */
+/* { dg-final { scan-assembler-times "\[\t \]0x100\[\t 
\]+\[^\n\]*btt_info" 9 } } */
 
-/* Check the signed/char flags, but not bit size. */
-/* { dg-final { scan-assembler-times "\[\t \]0x1..\[\t 
\]+\[^\n\]*bti_encoding" 3 } } */
-/* { dg-final { scan-assembler-times "\[\t \]0x2..\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
-/* { dg-final { scan-assembler-times "\[\t \]0x3..\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
+/* Check the signed flags, but not bit size. */
+/* { dg-final { scan-assembler-times "\[\t \]0x1..\[\t 
\]+\[^\n\]*bti_encoding" 4 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x..\[\t \]+\[^\n\]*bti_encoding" 
3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x.\[\t \]+\[^\n\]*bti_encoding" 
1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x4..\[\t 
\]+\[^\n\]*bti_encoding" 1 } } */
 
 /* Check that there is a string entry for each type name.  */
 /* { dg-final { scan-assembler-times "ascii \"unsigned char.0\"\[\t 
\]+\[^\n\]*btf_string" 1 } } */
@@ -42,3 +44,5 @@ signed int f = -66;
 
 unsigned long int g = 77;
 signed long int h = 88;
+
+_Bool x = 1;
-- 
2.11.0



Re: kernel sparse annotations vs. compiler attributes and debug_annotate_{type,decl} WAS: Re: [PATCH 0/9] Add debug_annotate attributes

2022-07-15 Thread Jose E. Marchesi via Gcc-patches


> On 7/14/22 8:09 AM, Jose E. Marchesi wrote:
>> Hi Yonghong.
>> 
>>> On 7/7/22 1:24 PM, Jose E. Marchesi wrote:
 Hi Yonghong.

> On 6/21/22 9:12 AM, Jose E. Marchesi wrote:
>>
>>> On 6/17/22 10:18 AM, Jose E. Marchesi wrote:
 Hi Yonghong.

> On 6/15/22 1:57 PM, David Faust wrote:
>>
>> On 6/14/22 22:53, Yonghong Song wrote:
>>>
>>>
>>> On 6/7/22 2:43 PM, David Faust wrote:
 Hello,

 This patch series adds support for:

 - Two new C-language-level attributes that allow to associate (to 
 "annotate" or
 to "tag") particular declarations and types with arbitrary 
 strings. As
 explained below, this is intended to be used to, for 
 example, characterize
 certain pointer types.

 - The conveyance of that information in the DWARF output in the 
 form of a new
 DIE: DW_TAG_GNU_annotation.

 - The conveyance of that information in the BTF output in the form 
 of two new
 kinds of BTF objects: BTF_KIND_DECL_TAG and 
 BTF_KIND_TYPE_TAG.

 All of these facilities are being added to the eBPF ecosystem, and 
 support for
 them exists in some form in LLVM.

 Purpose
 ===

 1)  Addition of C-family language constructs (attributes) to 
 specify free-text
   tags on certain language elements, such as struct fields.

   The purpose of these annotations is to provide 
 additional information about
   types, variables, and function parameters of interest to 
 the kernel. A
   driving use case is to tag pointer types within the 
 linux kernel and eBPF
   programs with additional semantic information, such as 
 '__user' or '__rcu'.

   For example, consider the linux kernel function 
 do_execve with the
   following declaration:

 static int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp);

   Here, __user could be defined with these annotations to 
 record semantic
   information about the pointer parameters (e.g., they are 
 user-provided) in
   DWARF and BTF information. Other kernel facilites such 
 as the eBPF verifier
   can read the tags and make use of the information.

 2)  Conveying the tags in the generated DWARF debug info.

   The main motivation for emitting the tags in DWARF is 
 that the Linux kernel
   generates its BTF information via pahole, using DWARF as 
 a source:

   ++  BTF  BTF   +--+
   | pahole |---> vmlinux.btf --->| verifier |
   ++ +--+
   ^^
   ||
 DWARF |BTF |
   ||
vmlinux  +-+
module1.ko   | BPF program |
module2.ko   +-+
  ...

   This is because:

   a)  Unlike GCC, LLVM will only generate BTF for BPF 
 programs.

   b)  GCC can generate BTF for whatever target with -gbtf, 
 but there is no
   support for linking/deduplicating BTF in the linker.

   In the scenario above, the verifier needs access to the 
 pointer tags of
   both the kernel types/declarations (conveyed in the 
 DWARF and translated
   to BTF by pahole) and those of the BPF program 
 (available directly in BTF).

   Another motivation for having the tag information in 
 DWARF, unrelated to
   BPF 

Re: kernel sparse annotations vs. compiler attributes and debug_annotate_{type,decl} WAS: Re: [PATCH 0/9] Add debug_annotate attributes

2022-07-14 Thread Jose E. Marchesi via Gcc-patches


Hi Yonghong.

> On 7/7/22 1:24 PM, Jose E. Marchesi wrote:
>> Hi Yonghong.
>> 
>>> On 6/21/22 9:12 AM, Jose E. Marchesi wrote:

> On 6/17/22 10:18 AM, Jose E. Marchesi wrote:
>> Hi Yonghong.
>>
>>> On 6/15/22 1:57 PM, David Faust wrote:

 On 6/14/22 22:53, Yonghong Song wrote:
>
>
> On 6/7/22 2:43 PM, David Faust wrote:
>> Hello,
>>
>> This patch series adds support for:
>>
>> - Two new C-language-level attributes that allow to associate (to 
>> "annotate" or
>>to "tag") particular declarations and types with arbitrary 
>> strings. As
>>explained below, this is intended to be used to, for example, 
>> characterize
>>certain pointer types.
>>
>> - The conveyance of that information in the DWARF output in the form 
>> of a new
>>DIE: DW_TAG_GNU_annotation.
>>
>> - The conveyance of that information in the BTF output in the form 
>> of two new
>>kinds of BTF objects: BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG.
>>
>> All of these facilities are being added to the eBPF ecosystem, and 
>> support for
>> them exists in some form in LLVM.
>>
>> Purpose
>> ===
>>
>> 1)  Addition of C-family language constructs (attributes) to specify 
>> free-text
>>  tags on certain language elements, such as struct fields.
>>
>>  The purpose of these annotations is to provide additional 
>> information about
>>  types, variables, and function parameters of interest to 
>> the kernel. A
>>  driving use case is to tag pointer types within the linux 
>> kernel and eBPF
>>  programs with additional semantic information, such as 
>> '__user' or '__rcu'.
>>
>>  For example, consider the linux kernel function do_execve 
>> with the
>>  following declaration:
>>
>>static int do_execve(struct filename *filename,
>>   const char __user *const __user *__argv,
>>   const char __user *const __user *__envp);
>>
>>  Here, __user could be defined with these annotations to 
>> record semantic
>>  information about the pointer parameters (e.g., they are 
>> user-provided) in
>>  DWARF and BTF information. Other kernel facilites such as 
>> the eBPF verifier
>>  can read the tags and make use of the information.
>>
>> 2)  Conveying the tags in the generated DWARF debug info.
>>
>>  The main motivation for emitting the tags in DWARF is that 
>> the Linux kernel
>>  generates its BTF information via pahole, using DWARF as a 
>> source:
>>
>>  ++  BTF  BTF   +--+
>>  | pahole |---> vmlinux.btf --->| verifier |
>>  ++ +--+
>>  ^^
>>  ||
>>DWARF |BTF |
>>  ||
>>   vmlinux  +-+
>>   module1.ko   | BPF program |
>>   module2.ko   +-+
>> ...
>>
>>  This is because:
>>
>>  a)  Unlike GCC, LLVM will only generate BTF for BPF 
>> programs.
>>
>>  b)  GCC can generate BTF for whatever target with -gbtf, 
>> but there is no
>>  support for linking/deduplicating BTF in the linker.
>>
>>  In the scenario above, the verifier needs access to the 
>> pointer tags of
>>  both the kernel types/declarations (conveyed in the DWARF 
>> and translated
>>  to BTF by pahole) and those of the BPF program (available 
>> directly in BTF).
>>
>>  Another motivation for having the tag information in DWARF, 
>> unrelated to
>>  BPF and BTF, is that the drgn project (another DWARF 
>> consumer) also wants
>>  to benefit from these tags in order to differentiate 
>> between different
>>  kinds of pointers in the kernel.
>>
>> 3)  Conveying the tags in the generated BTF debug info.

[PATCH V2] btf: emit linkage information in BTF_KIND_FUNC entries

2022-07-12 Thread Jose E. Marchesi via Gcc-patches


The kernel bpftool expects BTF_KIND_FUNC entries in BTF to include an
annotation reflecting the linkage of functions (static, global).  For
whatever reason they abuse the `vlen' field of the BTF_KIND_FUNC entry
instead of adding a variable-part to the record like it is done with
other entry kinds.

This patch makes GCC to include this linkage info in BTF_KIND_FUNC
entries.

Tested in bpf-unknown-none target.

gcc/ChangeLog:

PR debug/106263
* ctfc.h (struct ctf_dtdef): Add field linkage.
* ctfc.cc (ctf_add_function): Set ctti_linkage.
* dwarf2ctf.cc (gen_ctf_function_type): Pass a linkage for
function types and subprograms.
* btfout.cc (btf_asm_func_type): Emit linkage information for the
function.
(btf_dtd_emit_preprocess_cb): Propagate the linkage information
for functions.

gcc/testsuite/ChangeLog:

PR debug/106263
* gcc.dg/debug/btf/btf-function-4.c: New test.
* gcc.dg/debug/btf/btf-function-5.c: Likewise.
---
 gcc/btfout.cc   |  6 +-
 gcc/ctfc.cc |  3 ++-
 gcc/ctfc.h  |  3 ++-
 gcc/dwarf2ctf.cc|  4 +++-
 gcc/testsuite/gcc.dg/debug/btf/btf-function-4.c | 14 ++
 gcc/testsuite/gcc.dg/debug/btf/btf-function-5.c | 14 ++
 6 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-4.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-function-5.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 31af50521da..594cba84910 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -463,6 +463,7 @@ btf_dtd_emit_preprocess_cb (ctf_container_ref ctfc, 
ctf_dtdef_ref dtd)
   ctf_dtdef_ref func_dtd = ggc_cleared_alloc ();
   func_dtd->dtd_data = dtd->dtd_data;
   func_dtd->dtd_data.ctti_type = dtd->dtd_type;
+  func_dtd->linkage = dtd->linkage;
 
   vec_safe_push (funcs, func_dtd);
   num_types_created++;
@@ -740,7 +741,10 @@ static void
 btf_asm_func_type (ctf_dtdef_ref dtd)
 {
   dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
-  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0, 0), "btt_info");
+  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0,
+ dtd->linkage),
+   "btt_info: kind=%u, kflag=%u, linkage=%u",
+   BTF_KIND_FUNC, 0, dtd->linkage);
   dw2_asm_output_data (4, get_btf_id (dtd->dtd_data.ctti_type), "btt_type");
 }
 
diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
index f24e7bff948..9773358a475 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -777,7 +777,7 @@ ctf_add_function_arg (ctf_container_ref ctfc, dw_die_ref 
func,
 ctf_id_t
 ctf_add_function (ctf_container_ref ctfc, uint32_t flag, const char * name,
  const ctf_funcinfo_t * ctc, dw_die_ref die,
- bool from_global_func)
+ bool from_global_func, int linkage)
 {
   ctf_dtdef_ref dtd;
   ctf_id_t type;
@@ -791,6 +791,7 @@ ctf_add_function (ctf_container_ref ctfc, uint32_t flag, 
const char * name,
   type = ctf_add_generic (ctfc, flag, name, , die);
 
   dtd->from_global_func = from_global_func;
+  dtd->linkage = linkage;
   dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_FUNCTION, flag, vlen);
   /* Caller must make sure CTF types for ctc->ctc_return are already added.  */
   dtd->dtd_data.ctti_type = (uint32_t) ctc->ctc_return;
diff --git a/gcc/ctfc.h b/gcc/ctfc.h
index 001e544ef08..bcf3a43ae1b 100644
--- a/gcc/ctfc.h
+++ b/gcc/ctfc.h
@@ -161,6 +161,7 @@ struct GTY ((for_user)) ctf_dtdef
   ctf_itype_t dtd_data;  /* Type node.  */
   bool from_global_func; /* Whether this type was added from a global
function.  */
+  uint32_t linkage;   /* Used in function types.  0=local, 1=global.  
*/
   union GTY ((desc ("ctf_dtu_d_union_selector (&%1)")))
   {
 /* struct, union, or enum.  */
@@ -423,7 +424,7 @@ extern ctf_id_t ctf_add_forward (ctf_container_ref, 
uint32_t, const char *,
 extern ctf_id_t ctf_add_typedef (ctf_container_ref, uint32_t, const char *,
 ctf_id_t, dw_die_ref);
 extern ctf_id_t ctf_add_function (ctf_container_ref, uint32_t, const char *,
- const ctf_funcinfo_t *, dw_die_ref, bool);
+ const ctf_funcinfo_t *, dw_die_ref, bool, 
int);
 extern ctf_id_t ctf_add_sou (ctf_container_ref, uint32_t, const char *,
 uint32_t, size_t, dw_die_ref);
 
diff --git a/gcc/dwarf2ctf.cc b/gcc/dwarf2ctf.cc
index a6329ab6ee4..39714c2 100644
--- a/gcc/dwarf2ctf.cc
+++ b/gcc/dwarf2ctf.cc
@@ -644,6 +644,7 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
 
   ctf_funcinfo_t func_info;
   uint32_t num_args = 0;
+  int linkage = get_AT_flag (function, DW_AT_external);
 
   ctf_id_t 

Re: [PATCH] btf: emit linkage information in BTF_KIND_FUNC entries

2022-07-12 Thread Jose E. Marchesi via Gcc-patches


> On 7/8/22 11:30 AM, Jose E. Marchesi via Gcc-patches wrote:
>> 
>> The kernel bpftool expects BTF_KIND_FUNC entries in BTF to include an
>> annotation reflecting the linkage of functions (static, global).  For
>> whatever reason they (ab)use the `vlen' field of the BTF_KIND_FUNC entry
>> instead of adding a variable-part to the record like it is done with
>> other entry kinds.
>> 
>
> For BTF Variables, we have the linkage information in the output
> section as "btv_linkage".  To propagate that information from DWARF to
> BTF, we have the dvd_visibility in struct ctf_dvdef (in ctfc.h). Now
> that the linkage information is needed for the BTF_KIND_FUNC entries,
> what do you think about - adding something like dtd_visibility to
> ctf_dtdef.
>
> Updating the BTF format documentation will be useful
> https://www.kernel.org/doc/Documentation/bpf/btf.rst. Let's see what
> can be done for that...
>
> Also, adding some testcases with the current patch will be great.
>
> I have created PR debug/106263 "BTF_KIND_FUNC type does not encode
> linkage" to track this.

Sending V2 with the requested changes.


[PATCH] btf: emit linkage information in BTF_KIND_FUNC entries

2022-07-08 Thread Jose E. Marchesi via Gcc-patches



The kernel bpftool expects BTF_KIND_FUNC entries in BTF to include an
annotation reflecting the linkage of functions (static, global).  For
whatever reason they (ab)use the `vlen' field of the BTF_KIND_FUNC entry
instead of adding a variable-part to the record like it is done with
other entry kinds.

This patch makes GCC to include this linkage info in BTF_KIND_FUNC
entries.

Tested in bpf-unknown-none target.

gcc/ChangeLog:

* ctfc.h (struct ctf_itype): Add field ctti_linkage.
* ctfc.cc (ctf_add_function): Set ctti_linkage.
* dwarf2ctf.cc (gen_ctf_function_type): Pass a linkage for
function types and subprograms.
* btfout.cc (btf_asm_func_type): Emit linkage information for the
function.
---
 gcc/btfout.cc| 3 ++-
 gcc/ctfc.cc  | 3 ++-
 gcc/ctfc.h   | 3 ++-
 gcc/dwarf2ctf.cc | 4 +++-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 31af50521da..417d87cf519 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -740,7 +740,8 @@ static void
 btf_asm_func_type (ctf_dtdef_ref dtd)
 {
   dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
-  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0, 0), "btt_info");
+  dw2_asm_output_data (4, BTF_TYPE_INFO (BTF_KIND_FUNC, 0,
+ dtd->dtd_data.ctti_linkage), 
"btt_info");
   dw2_asm_output_data (4, get_btf_id (dtd->dtd_data.ctti_type), "btt_type");
 }
 
diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
index f24e7bff948..ad7f8bb8e86 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -777,7 +777,7 @@ ctf_add_function_arg (ctf_container_ref ctfc, dw_die_ref 
func,
 ctf_id_t
 ctf_add_function (ctf_container_ref ctfc, uint32_t flag, const char * name,
  const ctf_funcinfo_t * ctc, dw_die_ref die,
- bool from_global_func)
+ bool from_global_func, int linkage)
 {
   ctf_dtdef_ref dtd;
   ctf_id_t type;
@@ -792,6 +792,7 @@ ctf_add_function (ctf_container_ref ctfc, uint32_t flag, 
const char * name,
 
   dtd->from_global_func = from_global_func;
   dtd->dtd_data.ctti_info = CTF_TYPE_INFO (CTF_K_FUNCTION, flag, vlen);
+  dtd->dtd_data.ctti_linkage = linkage;
   /* Caller must make sure CTF types for ctc->ctc_return are already added.  */
   dtd->dtd_data.ctti_type = (uint32_t) ctc->ctc_return;
   /* Caller must make sure CTF types for function arguments are already added
diff --git a/gcc/ctfc.h b/gcc/ctfc.h
index 001e544ef08..273997a2302 100644
--- a/gcc/ctfc.h
+++ b/gcc/ctfc.h
@@ -116,6 +116,7 @@ typedef struct GTY (()) ctf_itype
   } _u;
   uint32_t ctti_lsizehi;   /* High 32 bits of type size in bytes.  */
   uint32_t ctti_lsizelo;   /* Low 32 bits of type size in bytes.  */
+  uint32_t ctti_linkage;   /* Linkage info for function types.  */
 } ctf_itype_t;
 
 #define ctti_size _u._size
@@ -423,7 +424,7 @@ extern ctf_id_t ctf_add_forward (ctf_container_ref, 
uint32_t, const char *,
 extern ctf_id_t ctf_add_typedef (ctf_container_ref, uint32_t, const char *,
 ctf_id_t, dw_die_ref);
 extern ctf_id_t ctf_add_function (ctf_container_ref, uint32_t, const char *,
- const ctf_funcinfo_t *, dw_die_ref, bool);
+ const ctf_funcinfo_t *, dw_die_ref, bool, 
int);
 extern ctf_id_t ctf_add_sou (ctf_container_ref, uint32_t, const char *,
 uint32_t, size_t, dw_die_ref);
 
diff --git a/gcc/dwarf2ctf.cc b/gcc/dwarf2ctf.cc
index a6329ab6ee4..39714c2 100644
--- a/gcc/dwarf2ctf.cc
+++ b/gcc/dwarf2ctf.cc
@@ -644,6 +644,7 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
 
   ctf_funcinfo_t func_info;
   uint32_t num_args = 0;
+  int linkage = get_AT_flag (function, DW_AT_external);
 
   ctf_id_t return_type_id;
   ctf_id_t function_type_id;
@@ -687,7 +688,8 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
   function_name,
   (const ctf_funcinfo_t *)_info,
   function,
-  from_global_func);
+  from_global_func,
+   linkage);
 
   /* Second pass on formals: generate the CTF types corresponding to
  them and add them as CTF function args.  */
-- 
2.11.0



  1   2   3   >