Re: [PATCH] bpf: add inline memset expansion

2024-03-04 Thread Jose E. Marchesi


Hi David.
Thanks for the patch.
OK.

> Similar to memmove and memcpy, the BPF backend cannot fall back on a
> library call to implement __builtin_memset, and should always expand
> calls to it inline if possible.
>
> This patch implements simple inline expansion of memset in the BPF
> backend in a verifier-friendly way. Similar to memcpy and memmove, the
> size must be an integer constant, as is also required by clang.
>
> Tested for bpf-unknown-none target on x86_64-linux-gnu host.
> Also testetd against kernel BPF verifier by compiling and loading a
> test program using the inline memset expansion.
>
> gcc/
>   * config/bpf/bpf-protos.h (bpf_expand_setmem): New prototype.
>   * config/bpf/bpf.cc (bpf_expand_setmem): New.
>   * config/bpf/bpf.md (setmemdi): New define_expand.
>
> gcc/testsuite/
>   * gcc.target/bpf/memset-1.c: New test.
> ---
>  gcc/config/bpf/bpf-protos.h |  1 +
>  gcc/config/bpf/bpf.cc   | 66 +
>  gcc/config/bpf/bpf.md   | 17 +++
>  gcc/testsuite/gcc.target/bpf/memset-1.c | 39 +++
>  4 files changed, 123 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/memset-1.c
>
> diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
> index 366acb87ae4..ac0c2f4038f 100644
> --- a/gcc/config/bpf/bpf-protos.h
> +++ b/gcc/config/bpf/bpf-protos.h
> @@ -36,5 +36,6 @@ class gimple_opt_pass;
>  gimple_opt_pass *make_pass_lower_bpf_core (gcc::context *ctxt);
>  
>  bool bpf_expand_cpymem (rtx *, bool);
> +bool bpf_expand_setmem (rtx *);
>  
>  #endif /* ! GCC_BPF_PROTOS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 22b0cf2dc46..0e33f4347ba 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -1309,6 +1309,72 @@ bpf_expand_cpymem (rtx *operands, bool is_move)
>return true;
>  }
>  
> +/* Expand setmem, as from __builtin_memset.
> +   OPERANDS are the same as the setmem pattern.
> +   Return true if the expansion was successful, false otherwise.  */
> +
> +bool
> +bpf_expand_setmem (rtx *operands)
> +{
> +  /* Size must be constant for this expansion to work.  */
> +  if (!CONST_INT_P (operands[1]))
> +{
> +  if (flag_building_libgcc)
> + warning (0, "could not inline call to %<__builtin_memset%>: "
> +  "size must be constant");
> +  else
> + error ("could not inline call to %<__builtin_memset%>: "
> +"size must be constant");
> +  return false;
> +}
> +
> +  /* Alignment is a CONST_INT.  */
> +  gcc_assert (CONST_INT_P (operands[3]));
> +
> +  rtx dst = operands[0];
> +  rtx size = operands[1];
> +  rtx val = operands[2];
> +  unsigned HOST_WIDE_INT size_bytes = UINTVAL (size);
> +  unsigned align = UINTVAL (operands[3]);
> +  enum machine_mode mode;
> +  switch (align)
> +{
> +case 1: mode = QImode; break;
> +case 2: mode = HImode; break;
> +case 4: mode = SImode; break;
> +case 8: mode = DImode; break;
> +default:
> +  gcc_unreachable ();
> +}
> +
> +  unsigned iters = size_bytes >> ceil_log2 (align);
> +  unsigned remainder = size_bytes & (align - 1);
> +  unsigned inc = GET_MODE_SIZE (mode);
> +  unsigned offset = 0;
> +
> +  for (unsigned int i = 0; i < iters; i++)
> +{
> +  emit_move_insn (adjust_address (dst, mode, offset), val);
> +  offset += inc;
> +}
> +  if (remainder & 4)
> +{
> +  emit_move_insn (adjust_address (dst, SImode, offset), val);
> +  offset += 4;
> +  remainder -= 4;
> +}
> +  if (remainder & 2)
> +{
> +  emit_move_insn (adjust_address (dst, HImode, offset), val);
> +  offset += 2;
> +  remainder -= 2;
> +}
> +  if (remainder & 1)
> +emit_move_insn (adjust_address (dst, QImode, offset), val);
> +
> +  return true;
> +}
> +
>  /* Finally, build the GCC target.  */
>  
>  struct gcc_target targetm = TARGET_INITIALIZER;
> diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
> index ca677bc6b50..ea688aadf91 100644
> --- a/gcc/config/bpf/bpf.md
> +++ b/gcc/config/bpf/bpf.md
> @@ -663,4 +663,21 @@ (define_expand "movmemdi"
>FAIL;
>  })
>  
> +;; memset
> +;; 0 is dst
> +;; 1 is length
> +;; 2 is value
> +;; 3 is alignment
> +(define_expand "setmemdi"
> +  [(set (match_operand:BLK 0 "memory_operand")
> + (match_operand:QI  2 "nonmemory_operand"))
> +   (use (match_operand:DI  1 "general_operand"))
> +   (match_operand 3 "immediate_operand")]
> + ""
> + {
> +  if (bpf_expand_setmem (operands))
> +DONE;
> +  FAIL;
> +})
> +
>  (include "atomic.md")
> diff --git a/gcc/testsuite/gcc.target/bpf/memset-1.c 
> b/gcc/testsuite/gcc.target/bpf/memset-1.c
> new file mode 100644
> index 000..9e9f8eff028
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/memset-1.c
> @@ -0,0 +1,39 @@
> +/* Ensure memset is expanded inline rather than emitting a libcall.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +struct context {
> + 

[PATCH] bpf: add inline memset expansion

2024-03-04 Thread David Faust
Similar to memmove and memcpy, the BPF backend cannot fall back on a
library call to implement __builtin_memset, and should always expand
calls to it inline if possible.

This patch implements simple inline expansion of memset in the BPF
backend in a verifier-friendly way. Similar to memcpy and memmove, the
size must be an integer constant, as is also required by clang.

Tested for bpf-unknown-none target on x86_64-linux-gnu host.
Also testetd against kernel BPF verifier by compiling and loading a
test program using the inline memset expansion.

gcc/
* config/bpf/bpf-protos.h (bpf_expand_setmem): New prototype.
* config/bpf/bpf.cc (bpf_expand_setmem): New.
* config/bpf/bpf.md (setmemdi): New define_expand.

gcc/testsuite/
* gcc.target/bpf/memset-1.c: New test.
---
 gcc/config/bpf/bpf-protos.h |  1 +
 gcc/config/bpf/bpf.cc   | 66 +
 gcc/config/bpf/bpf.md   | 17 +++
 gcc/testsuite/gcc.target/bpf/memset-1.c | 39 +++
 4 files changed, 123 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/bpf/memset-1.c

diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
index 366acb87ae4..ac0c2f4038f 100644
--- a/gcc/config/bpf/bpf-protos.h
+++ b/gcc/config/bpf/bpf-protos.h
@@ -36,5 +36,6 @@ class gimple_opt_pass;
 gimple_opt_pass *make_pass_lower_bpf_core (gcc::context *ctxt);
 
 bool bpf_expand_cpymem (rtx *, bool);
+bool bpf_expand_setmem (rtx *);
 
 #endif /* ! GCC_BPF_PROTOS_H */
diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 22b0cf2dc46..0e33f4347ba 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -1309,6 +1309,72 @@ bpf_expand_cpymem (rtx *operands, bool is_move)
   return true;
 }
 
+/* Expand setmem, as from __builtin_memset.
+   OPERANDS are the same as the setmem pattern.
+   Return true if the expansion was successful, false otherwise.  */
+
+bool
+bpf_expand_setmem (rtx *operands)
+{
+  /* Size must be constant for this expansion to work.  */
+  if (!CONST_INT_P (operands[1]))
+{
+  if (flag_building_libgcc)
+   warning (0, "could not inline call to %<__builtin_memset%>: "
+"size must be constant");
+  else
+   error ("could not inline call to %<__builtin_memset%>: "
+  "size must be constant");
+  return false;
+}
+
+  /* Alignment is a CONST_INT.  */
+  gcc_assert (CONST_INT_P (operands[3]));
+
+  rtx dst = operands[0];
+  rtx size = operands[1];
+  rtx val = operands[2];
+  unsigned HOST_WIDE_INT size_bytes = UINTVAL (size);
+  unsigned align = UINTVAL (operands[3]);
+  enum machine_mode mode;
+  switch (align)
+{
+case 1: mode = QImode; break;
+case 2: mode = HImode; break;
+case 4: mode = SImode; break;
+case 8: mode = DImode; break;
+default:
+  gcc_unreachable ();
+}
+
+  unsigned iters = size_bytes >> ceil_log2 (align);
+  unsigned remainder = size_bytes & (align - 1);
+  unsigned inc = GET_MODE_SIZE (mode);
+  unsigned offset = 0;
+
+  for (unsigned int i = 0; i < iters; i++)
+{
+  emit_move_insn (adjust_address (dst, mode, offset), val);
+  offset += inc;
+}
+  if (remainder & 4)
+{
+  emit_move_insn (adjust_address (dst, SImode, offset), val);
+  offset += 4;
+  remainder -= 4;
+}
+  if (remainder & 2)
+{
+  emit_move_insn (adjust_address (dst, HImode, offset), val);
+  offset += 2;
+  remainder -= 2;
+}
+  if (remainder & 1)
+emit_move_insn (adjust_address (dst, QImode, offset), val);
+
+  return true;
+}
+
 /* Finally, build the GCC target.  */
 
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index ca677bc6b50..ea688aadf91 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -663,4 +663,21 @@ (define_expand "movmemdi"
   FAIL;
 })
 
+;; memset
+;; 0 is dst
+;; 1 is length
+;; 2 is value
+;; 3 is alignment
+(define_expand "setmemdi"
+  [(set (match_operand:BLK 0 "memory_operand")
+   (match_operand:QI  2 "nonmemory_operand"))
+   (use (match_operand:DI  1 "general_operand"))
+   (match_operand 3 "immediate_operand")]
+ ""
+ {
+  if (bpf_expand_setmem (operands))
+DONE;
+  FAIL;
+})
+
 (include "atomic.md")
diff --git a/gcc/testsuite/gcc.target/bpf/memset-1.c 
b/gcc/testsuite/gcc.target/bpf/memset-1.c
new file mode 100644
index 000..9e9f8eff028
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/memset-1.c
@@ -0,0 +1,39 @@
+/* Ensure memset is expanded inline rather than emitting a libcall.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct context {
+ unsigned int data;
+ unsigned int data_end;
+ unsigned int data_meta;
+ unsigned int ingress;
+ unsigned int queue_index;
+ unsigned int egress;
+};
+
+void
+set_small (struct context *ctx)
+{
+  void *data = (void *)(long)ctx->data;
+  char *dest = data;
+  __builtin_memset (dest + 4, 0, sizeof (struct context) - 4);
+}
+
+void