[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-26 Thread Yonghong Song via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6c412b6c6faa: [BPF] Add a few new insns under cpu=v4 
(authored by yonghong-song).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/lib/Basic/Targets/BPF.h
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
  llvm/lib/Target/BPF/BPFISelLowering.cpp
  llvm/lib/Target/BPF/BPFISelLowering.h
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIPeephole.cpp
  llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
  llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
  llvm/test/CodeGen/BPF/bswap.ll
  llvm/test/CodeGen/BPF/gotol.ll
  llvm/test/CodeGen/BPF/ldsx.ll
  llvm/test/CodeGen/BPF/movsx.ll
  llvm/test/CodeGen/BPF/sdiv_smod.ll

Index: llvm/test/CodeGen/BPF/sdiv_smod.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/sdiv_smod.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+;  int foo(int a, int b, int c) {
+;return a/b + a%c;
+;  }
+;  long bar(long a, long b, long c) {
+;   return a/b + a%c;
+; }
+; Compilation flags:
+;   clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %c
+  %add = add nsw i32 %rem, %div
+  ret i32 %add
+}
+
+; CHECK:   w0 = w1
+; CHECK-NEXT:  w1 s/= w2   # encoding: [0x3c,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  w0 s%= w3   # encoding: [0x9c,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %c
+  %add = add nsw i64 %rem, %div
+  ret i64 %add
+}
+; CHECK:   r0 = r1
+; CHECK-NEXT:  r1 s/= r2   # encoding: [0x3f,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  r0 s%= r3   # encoding: [0x9f,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git c102025a4299e74767cdb4dfba8abbf6cbad820b)"}
Index: llvm/test/CodeGen/BPF/movsx.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/movsx.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+;   short f1(int a) {
+; return (char)a;
+;   }
+;   int f2(int a) {
+; return (short)a;
+;   }
+;   long f3(int a) {
+; return (char)a;
+;   }
+;   long f4(int a) {
+; return (short)a;
+;   }
+;   long f5(int a) {
+; return a;
+;   }
+;   long f6(long a) {
+; return (int)a;
+;   }
+; Compilation flags:
+;   clang -target bpf -O2 -mcpu=v4 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i16 @f1(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %conv = trunc i32 %a to i8
+  %conv1 = sext i8 %conv to i16
+  ret i16 %conv1
+}
+; CHECK:  w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @f2(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %sext = shl i32 %a, 16
+  %conv1 = ashr exact i32 %sext, 16
+  ret i32 %conv1
+}
+; CHECK:  w0 = (s16)w1# encoding: [0xbc,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f3(i32 noundef %a) 

[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-25 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 544055.
yonghong-song added a comment.

- Add more tests in assembler-disassembler-v4.s and gotol.ll.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/lib/Basic/Targets/BPF.h
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
  llvm/lib/Target/BPF/BPFISelLowering.cpp
  llvm/lib/Target/BPF/BPFISelLowering.h
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIPeephole.cpp
  llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
  llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
  llvm/test/CodeGen/BPF/bswap.ll
  llvm/test/CodeGen/BPF/gotol.ll
  llvm/test/CodeGen/BPF/ldsx.ll
  llvm/test/CodeGen/BPF/movsx.ll
  llvm/test/CodeGen/BPF/sdiv_smod.ll

Index: llvm/test/CodeGen/BPF/sdiv_smod.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/sdiv_smod.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+;  int foo(int a, int b, int c) {
+;return a/b + a%c;
+;  }
+;  long bar(long a, long b, long c) {
+;   return a/b + a%c;
+; }
+; Compilation flags:
+;   clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %c
+  %add = add nsw i32 %rem, %div
+  ret i32 %add
+}
+
+; CHECK:   w0 = w1
+; CHECK-NEXT:  w1 s/= w2   # encoding: [0x3c,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  w0 s%= w3   # encoding: [0x9c,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %c
+  %add = add nsw i64 %rem, %div
+  ret i64 %add
+}
+; CHECK:   r0 = r1
+; CHECK-NEXT:  r1 s/= r2   # encoding: [0x3f,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  r0 s%= r3   # encoding: [0x9f,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git c102025a4299e74767cdb4dfba8abbf6cbad820b)"}
Index: llvm/test/CodeGen/BPF/movsx.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/movsx.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+;   short f1(int a) {
+; return (char)a;
+;   }
+;   int f2(int a) {
+; return (short)a;
+;   }
+;   long f3(int a) {
+; return (char)a;
+;   }
+;   long f4(int a) {
+; return (short)a;
+;   }
+;   long f5(int a) {
+; return a;
+;   }
+;   long f6(long a) {
+; return (int)a;
+;   }
+; Compilation flags:
+;   clang -target bpf -O2 -mcpu=v4 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i16 @f1(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %conv = trunc i32 %a to i8
+  %conv1 = sext i8 %conv to i16
+  ret i16 %conv1
+}
+; CHECK:  w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @f2(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %sext = shl i32 %a, 16
+  %conv1 = ashr exact i32 %sext, 16
+  ret i32 %conv1
+}
+; CHECK:  w0 = (s16)w1# encoding: [0xbc,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f3(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %conv = zext i32 %a to i64
+  %sext = shl i64 %conv, 56
+  

[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-25 Thread Eduard Zingerman via Phabricator via cfe-commits
eddyz87 accepted this revision.
eddyz87 added a comment.

Hi Yonghong,

Looks good to me, thanks!
Before landing this, could you please adjust tests a little bit more?

- Extend `assembler-disassembler-v4.s` with signed `div` and `mod`, e.g.:

  // CHECK: 3f 31 01 00 00 00 00 00 r1 s/= r3
  // CHECK: 9f 42 01 00 00 00 00 00 r2 s%= r4
  r1 s/= r3
  r2 s%= r4
  
  // CHECK: 3c 31 01 00 00 00 00 00 w1 s/= w3
  // CHECK: 9c 42 01 00 00 00 00 00 w2 s%= w4
  w1 s/= w3
  w2 s%= w4

- For `gotol` add a test case which tries each possibility in 
`BPFMIPreEmitPeephole::adjustBranch()`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-24 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 543709.
yonghong-song added a comment.

Three major changes in this patch:

- for ldsx insns, remove 32bit ldsx insns (1-byte and 2-byte sign extension) 
since the ldsx insn expects to sign extension all the way up to 8-byte and 
normal 32bit insn (e.g. BPF_ALU) expects to zero out the top bits. Instead do a 
ldbsx/ldhsx and then take the lower 4 byte to extract 32bit value. This also 
resolved one disasm issue reported by Eduard.
- for movsx insn, for 32bit sign extenstion to 64bit. Match both "sext_inreg 
GPR:$src, i32" (left and right shifting) and "sext GPR32:$src".
- Add an internal flag to control when to generate gotol insns in 
BPFMIPeephole.cpp. This permits a simpler test for gotol insns.

With the above changes, the following change is needed:

  diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c 
b/tools/testing/selftests/bpf/progs/verifier_movsx.c
  index 5ee7d004f8ba..e27bfa11c9b3 100644
  --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
  +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
  @@ -59,7 +59,7 @@ __naked void mov64sx_s32(void)
   {
  asm volatile (" \
  r0 = 0xfffe;\
  -   r0 = (s32)w0;   \
  +   r0 = (s32)r0;   \
  r0 >>= 1;   \
  exit;   \
   "  ::: __clobber_all);
  @@ -181,7 +181,7 @@ __naked void mov64sx_s32_range(void)
   {
  asm volatile (" \
  call %[bpf_get_prandom_u32];\
  -   r1 = (s32)w0;   \
  +   r1 = (s32)r0;   \
  /* r1 with s32 range */ \
  if r1 s> 0x7fff goto l0_%=; \
  if r1 s< -0x8000 goto l0_%=;\

in order to compile kernel cpu v4 support (patch series v3)

  https://lore.kernel.org/bpf/2023072103.99949-1-...@fb.com/

I will update the kernel side once we resolved all llvm issues.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/lib/Basic/Targets/BPF.h
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
  llvm/lib/Target/BPF/BPFISelLowering.cpp
  llvm/lib/Target/BPF/BPFISelLowering.h
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIPeephole.cpp
  llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
  llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
  llvm/test/CodeGen/BPF/bswap.ll
  llvm/test/CodeGen/BPF/gotol.ll
  llvm/test/CodeGen/BPF/ldsx.ll
  llvm/test/CodeGen/BPF/movsx.ll
  llvm/test/CodeGen/BPF/sdiv_smod.ll

Index: llvm/test/CodeGen/BPF/sdiv_smod.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/sdiv_smod.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+;  int foo(int a, int b, int c) {
+;return a/b + a%c;
+;  }
+;  long bar(long a, long b, long c) {
+;   return a/b + a%c;
+; }
+; Compilation flags:
+;   clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %c
+  %add = add nsw i32 %rem, %div
+  ret i32 %add
+}
+
+; CHECK:   w0 = w1
+; CHECK-NEXT:  w1 s/= w2   # encoding: [0x3c,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  w0 s%= w3   # encoding: [0x9c,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %c
+  %add = add nsw i64 %rem, %div
+  ret i64 %add
+}
+; CHECK:   r0 = r1
+; CHECK-NEXT:  r1 s/= r2   # encoding: [0x3f,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT:  r0 s%= r3   # encoding: 

[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-24 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

> Could you please also add a few tests for `gotol`?

Will do!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-24 Thread Eduard Zingerman via Phabricator via cfe-commits
eddyz87 added a comment.

Hi Yonghong,

Thank you for the comments.
Could you please also add a few tests for `gotol`?
Sorry, I should have asked for those last week.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-24 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:379
+  "$dst = (s8)$src",
+  [(set GPR:$dst, (sra (shl GPR:$src, (i64 56)), (i64 
56)))]>;
+  def MOVSX_rr_16 : ALU_RR I think it is possible to avoid matching expansion pattern `(sra (shl 
> GPR:$src, (i64 56))` here, and instead turn off the expansion when `movsx` is 
> available.
> 
> I tried the change below and all BPF codegen tests are passing. Do I miss 
> something?
> 
> ---
> 
> ```
> diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp 
> b/llvm/lib/Target/BPF/BPFISelLowering.cpp
> index 9a7357d6ad04..5e84af009591 100644
> --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
> +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
> @@ -132,9 +132,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine 
> ,
>setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
>  
>setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
> +  if (!STI.hasMovsx()) {
> +setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
> +setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
> +setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
> +  }
>  
>// Extended load operations for i1 types must be promoted
>for (MVT VT : MVT::integer_valuetypes()) {
> diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td 
> b/llvm/lib/Target/BPF/BPFInstrInfo.td
> index a1d532e60db2..29bec72aa92d 100644
> --- a/llvm/lib/Target/BPF/BPFInstrInfo.td
> +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
> @@ -376,11 +376,11 @@ let Predicates = [BPFHasMovsx] in {
>def MOVSX_rr_8 : ALU_RR(outs GPR:$dst), (ins GPR:$src),
>"$dst = (s8)$src",
> -  [(set GPR:$dst, (sra (shl GPR:$src, (i64 56)), (i64 
> 56)))]>;
> +  [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>;
>def MOVSX_rr_16 : ALU_RR(outs GPR:$dst), (ins GPR:$src),
>"$dst = (s16)$src",
> -  [(set GPR:$dst, (sra (shl GPR:$src, (i64 48)), (i64 
> 48)))]>;
> +  [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>;
>def MOVSX_rr_32 : ALU_RR(outs GPR:$dst), (ins GPR32:$src),
>"$dst = (s32)$src",
> @@ -388,11 +388,11 @@ let Predicates = [BPFHasMovsx] in {
>def MOVSX_rr_32_8 : ALU_RR(outs GPR32:$dst), (ins GPR32:$src),
>"$dst = (s8)$src",
> -  [(set GPR32:$dst, (sra (shl GPR32:$src, (i32 24)), 
> (i32 24)))]>;
> +  [(set GPR32:$dst, (sext_inreg GPR32:$src, i8))]>;
>def MOVSX_rr_32_16 : ALU_RR(outs GPR32:$dst), (ins GPR32:$src),
>"$dst = (s16)$src",
> -  [(set GPR32:$dst, (sra (shl GPR32:$src, (i32 16)), 
> (i32 16)))]>;
> +  [(set GPR32:$dst, (sext_inreg GPR32:$src, i16))]>;
>  }
>  }
>  ```
This indeed can simplify the code. I will incorporate your change into the 
patch. Thanks!



Comment at: llvm/lib/Target/BPF/BPFMIPeephole.cpp:321
+
+  std::map ReverseCondOpMap;
 

eddyz87 wrote:
> Is this map unused?
No. This is a leftover. Will remove.



Comment at: llvm/lib/Target/BPF/BPFMIPeephole.cpp:412
+  int CurrNumInsns = 0;
+  std::map SoFarNumInsns;
+  std::map FollowThroughBB;

eddyz87 wrote:
> Nitpick: Fangrui suggested in my llvm-objdump revisions to use `DenseMap` in 
> most cases (as `std::map` allocates for each pair).
Will try to use DenseMap.



Comment at: llvm/test/CodeGen/BPF/movsx.ll:30
+}
+; CHECK: w0 = w1 # encoding: 
[0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+

eddyz87 wrote:
> This does not seem right, as it does not sign extend 8-bit argument to 16-bit 
> value.
This is probably due to ABI. For example,
```
$ cat t1.c
__attribute__((noinline)) short f1(char a) {
  return a * a;
}

int f2(int a) {
  return f1(a);
}


$ clang --target=bpf -O2 -mcpu=v4 -S t1.c

f1: # @f1
# %bb.0:# %entry
w0 = w1
w0 *= w0
exit
.Lfunc_end0:
.size   f1, .Lfunc_end0-f1
# -- End function
.globl  f2  # -- Begin function f2
.p2align3
.type   f2,@function
f2: # @f2
# %bb.0:# %entry
w1 = (s8)w1
call f1
w0 = (s16)w0
exit
```
You can see in function f2(), the sign-extension has been done properly. and 
that is 

[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-24 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

In D144829#4519036 , @eddyz87 wrote:

> I tried adding a test similar to `assemble-disassemble.ll`:
>
>   // RUN: llvm-mc -triple bpfel --mcpu=v4 --assemble --filetype=obj %s \
>   // RUN:   | llvm-objdump -d --mattr=+alu32 - \
>   // RUN:   | FileCheck %s
>   
>   // CHECK: d7 01 00 00 10 00 00 00   r1 = bswap16 r1
>   // CHECK: d7 02 00 00 20 00 00 00   r2 = bswap32 r2
>   // CHECK: d7 03 00 00 40 00 00 00   r3 = bswap64 r3
>   r1 = bswap16 r1
>   r2 = bswap32 r2
>   r3 = bswap64 r3
>   
>   // CHECK: 91 41 00 00 00 00 00 00   r1 = *(s8 *)(r4 + 0x0)
>   // CHECK: 89 52 04 00 00 00 00 00   r2 = *(s16 *)(r5 + 0x4)
>   // CHECK: 81 63 08 00 00 00 00 00   r3 = *(s32 *)(r6 + 0x8)
>   r1 = *(s8 *)(r4 + 0)
>   r2 = *(s16 *)(r5 + 4)
>   r3 = *(s32 *)(r6 + 8)
>   
>   // CHECK: 91 41 00 00 00 00 00 00   w1 = *(s8 *)(r4 + 0x0)
>   // CHECK: 89 52 04 00 00 00 00 00   w2 = *(s16 *)(r5 + 0x4)
>   w1 = *(s8 *)(r4 + 0)
>   w2 = *(s16 *)(r5 + 4)
>   
>   // CHECK: bf 41 08 00 00 00 00 00   r1 = (s8)r4
>   // CHECK: bf 52 10 00 00 00 00 00   r2 = (s16)r5
>   // CHECK: bf 63 20 00 00 00 00 00   r3 = (s32)w6
>   r1 = (s8)r4
>   r2 = (s16)r5
>   r3 = (s32)w6
>   // Should this work as well: r3 = (s32)r6 ?
>   
>   // CHECK: bc 31 08 00 00 00 00 00   w1 = (s8)w3
>   // CHECK: bc 42 10 00 00 00 00 00   w2 = (s16)w4
>   w1 = (s8)w3
>   w2 = (s16)w4
>   
>   // CHECK: 3f 31 01 00 00 00 00 00   r1 s/= r3
>   // CHECK: 9f 42 01 00 00 00 00 00   r2 s%= r4
>   r1 s/= r3
>   r2 s%= r4
>   
>   // CHECK: 3c 31 01 00 00 00 00 00   w1 s/= w3
>   // CHECK: 9c 42 01 00 00 00 00 00   w2 s%= w4
>   w1 s/= w3
>   w2 s%= w4
>
> And it looks like some instructions are not printed correctly:
>
>   $ llvm-mc -triple bpfel --mcpu=v4 --assemble --filetype=obj 
> /home/eddy/work/llvm-project/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
>  | llvm-objdump -d --mattr=+alu32 -
>   
>   :file format elf64-bpf
>   
>   Disassembly of section .text:
>   
>    <.text>:
>  0:   d7 01 00 00 10 00 00 00 r1 = bswap16 r1
>  1:   d7 02 00 00 20 00 00 00 r2 = bswap32 r2
>  2:   d7 03 00 00 40 00 00 00 r3 = bswap64 r3
>  3:   91 41 00 00 00 00 00 00 w1 = *(s8 *)(r4 + 0x0)
>  4:   89 52 04 00 00 00 00 00 w2 = *(s16 *)(r5 + 0x4)
>  5:   81 63 08 00 00 00 00 00 
>  6:   91 41 00 00 00 00 00 00 w1 = *(s8 *)(r4 + 0x0)
>  7:   89 52 04 00 00 00 00 00 w2 = *(s16 *)(r5 + 0x4)
>  8:   bf 41 08 00 00 00 00 00 r1 = (s8)r4
>  9:   bf 52 10 00 00 00 00 00 r2 = (s16)r5
> 10:   bf 63 20 00 00 00 00 00 r3 = (s32)w6
> 11:   bc 31 08 00 00 00 00 00 w1 = (s8)w3
> 12:   bc 42 10 00 00 00 00 00 w2 = (s16)w4
> 13:   3f 31 01 00 00 00 00 00 r1 s/= r3
> 14:   9f 42 01 00 00 00 00 00 r2 s%= r4
> 15:   3c 31 01 00 00 00 00 00 w1 s/= w3
> 16:   9c 42 01 00 00 00 00 00 w2 s%= w4
>
> I'm not sure if this is an issue with disassembler or some additional 
> `--mattr` options are needed.

There is a problem in the td file for 32-bit signed load. Current definition is 
not quite right since it is supposed to sign-extension all the way to 64bit. I 
will fix it in the next revision.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-20 Thread Eduard Zingerman via Phabricator via cfe-commits
eddyz87 added a comment.

I tried adding a test similar to `assemble-disassemble.ll`:

  // RUN: llvm-mc -triple bpfel --mcpu=v4 --assemble --filetype=obj %s \
  // RUN:   | llvm-objdump -d --mattr=+alu32 - \
  // RUN:   | FileCheck %s
  
  // CHECK: d7 01 00 00 10 00 00 00 r1 = bswap16 r1
  // CHECK: d7 02 00 00 20 00 00 00 r2 = bswap32 r2
  // CHECK: d7 03 00 00 40 00 00 00 r3 = bswap64 r3
  r1 = bswap16 r1
  r2 = bswap32 r2
  r3 = bswap64 r3
  
  // CHECK: 91 41 00 00 00 00 00 00 r1 = *(s8 *)(r4 + 0x0)
  // CHECK: 89 52 04 00 00 00 00 00 r2 = *(s16 *)(r5 + 0x4)
  // CHECK: 81 63 08 00 00 00 00 00 r3 = *(s32 *)(r6 + 0x8)
  r1 = *(s8 *)(r4 + 0)
  r2 = *(s16 *)(r5 + 4)
  r3 = *(s32 *)(r6 + 8)
  
  // CHECK: 91 41 00 00 00 00 00 00 w1 = *(s8 *)(r4 + 0x0)
  // CHECK: 89 52 04 00 00 00 00 00 w2 = *(s16 *)(r5 + 0x4)
  w1 = *(s8 *)(r4 + 0)
  w2 = *(s16 *)(r5 + 4)
  
  // CHECK: bf 41 08 00 00 00 00 00 r1 = (s8)r4
  // CHECK: bf 52 10 00 00 00 00 00 r2 = (s16)r5
  // CHECK: bf 63 20 00 00 00 00 00 r3 = (s32)w6
  r1 = (s8)r4
  r2 = (s16)r5
  r3 = (s32)w6
  // Should this work as well: r3 = (s32)r6 ?
  
  // CHECK: bc 31 08 00 00 00 00 00 w1 = (s8)w3
  // CHECK: bc 42 10 00 00 00 00 00 w2 = (s16)w4
  w1 = (s8)w3
  w2 = (s16)w4

And it looks like some instructions are not printed correctly:

  $ llvm-mc -triple bpfel --mcpu=v4 --assemble --filetype=obj 
/home/eddy/work/llvm-project/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s 
| llvm-objdump -d --mattr=+alu32 -
  
  :  file format elf64-bpf
  
  Disassembly of section .text:
  
   <.text>:
 0: d7 01 00 00 10 00 00 00 r1 = bswap16 r1
 1: d7 02 00 00 20 00 00 00 r2 = bswap32 r2
 2: d7 03 00 00 40 00 00 00 r3 = bswap64 r3
 3: 91 41 00 00 00 00 00 00 w1 = *(s8 *)(r4 + 0x0)
 4: 89 52 04 00 00 00 00 00 w2 = *(s16 *)(r5 + 0x4)
 5: 81 63 08 00 00 00 00 00 
 6: 91 41 00 00 00 00 00 00 w1 = *(s8 *)(r4 + 0x0)
 7: 89 52 04 00 00 00 00 00 w2 = *(s16 *)(r5 + 0x4)
 8: bf 41 08 00 00 00 00 00 r1 = (s8)r4
 9: bf 52 10 00 00 00 00 00 r2 = (s16)r5
10: bf 63 20 00 00 00 00 00 r3 = (s32)w6
11: bc 31 08 00 00 00 00 00 w1 = (s8)w3
12: bc 42 10 00 00 00 00 00 w2 = (s16)w4

I'm not sure if this is an issue with disassembler or some additional `--mattr` 
options are needed.




Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:379
+  "$dst = (s8)$src",
+  [(set GPR:$dst, (sra (shl GPR:$src, (i64 56)), (i64 
56)))]>;
+  def MOVSX_rr_16 : ALU_RR;
+  [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>;
   def MOVSX_rr_16 : ALU_RR;
+  [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>;
   def MOVSX_rr_32 : ALU_RR;
+  [(set GPR32:$dst, (sext_inreg GPR32:$src, i8))]>;
   def MOVSX_rr_32_16 : ALU_RR;
+  [(set GPR32:$dst, (sext_inreg GPR32:$src, i16))]>;
 }
 }
 ```



Comment at: llvm/lib/Target/BPF/BPFMIPeephole.cpp:321
+
+  std::map ReverseCondOpMap;
 

Is this map unused?



Comment at: llvm/lib/Target/BPF/BPFMIPeephole.cpp:412
+  int CurrNumInsns = 0;
+  std::map SoFarNumInsns;
+  std::map FollowThroughBB;

Nitpick: Fangrui suggested in my llvm-objdump revisions to use `DenseMap` in 
most cases (as `std::map` allocates for each pair).



Comment at: llvm/test/CodeGen/BPF/movsx.ll:30
+}
+; CHECK: w0 = w1 # encoding: 
[0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+

This does not seem right, as it does not sign extend 8-bit argument to 16-bit 
value.



Comment at: llvm/test/CodeGen/BPF/movsx.ll:38
+}
+; CHECK: w0 = w1 # encoding: 
[0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+

Shouldn't this be `w0 = (s8)w1`?
A few checks below also look strange.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D144829: [BPF] Add a few new insns under cpu=v4

2023-07-19 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast accepted this revision.
ast added a comment.

lgtm. @eddyz87 pls take a look


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144829/new/

https://reviews.llvm.org/D144829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits