[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-11-16 Thread Anna Welker via Phabricator via cfe-commits
anwel updated this revision to Diff 229531.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a + b + c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer for this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer for this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer for this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg deficit.
+define <4 x i32> @four_high_four_return_reserved() {
+entry:
+  ; CHECK-INVALID-NOT: r{{6|8}}
+  tail call void asm sideeffect "", "~{r8},~{r9}"()
+  %vecinit = insertelement <4 x i32> undef, i32 1, i32 0
+  %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
+  %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
+  %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
+  ret <4 x i32> %vecinit13
+}
+
Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi  -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s
+;
+; Equivalent C source code
+; void bar(unsigned int i,
+;  unsigned int j,
+;  unsigned int k,
+;  unsigned int l,
+;  unsigned int m,
+;  unsigned int n,
+;  unsigned int o,
+;  unsigned int p)
+; {
+; unsigned int result = i + j + k + l + m + n + o + p;
+; }
+
+define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind {
+entry:
+; CHECK: push {{{.*}}r4, r5{{.*}}}
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+  %o.addr = alloca i32, align 4
+  %p.addr = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %l, i32* %l.addr, align 4
+  store i32 %m, i32* %m.addr, align 4
+  store i32 %n, i32* %n.addr, align 4
+  store i32 

[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-11-14 Thread Anna Welker via Phabricator via cfe-commits
anwel updated this revision to Diff 229296.
anwel added a comment.

Change clang's error message when trying to use the target's frame pointer as 
GRV to sound more like an error then a warning.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a + b + c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer on this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer on this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer on this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg deficit.
+define <4 x i32> @four_high_four_return_reserved() {
+entry:
+  ; CHECK-INVALID-NOT: r{{6|8}}
+  tail call void asm sideeffect "", "~{r8},~{r9}"()
+  %vecinit = insertelement <4 x i32> undef, i32 1, i32 0
+  %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
+  %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
+  %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
+  ret <4 x i32> %vecinit13
+}
+
Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi  -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s
+;
+; Equivalent C source code
+; void bar(unsigned int i,
+;  unsigned int j,
+;  unsigned int k,
+;  unsigned int l,
+;  unsigned int m,
+;  unsigned int n,
+;  unsigned int o,
+;  unsigned int p)
+; {
+; unsigned int result = i + j + k + l + m + n + o + p;
+; }
+
+define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind {
+entry:
+; CHECK: push {{{.*}}r4, r5{{.*}}}
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+  %o.addr = alloca i32, align 4
+  %p.addr = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* 

[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-11-14 Thread Anna Welker via Phabricator via cfe-commits
anwel updated this revision to Diff 229269.
anwel added a comment.

Rebase on current llvm-project master


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a + b + c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg deficit.
+define <4 x i32> @four_high_four_return_reserved() {
+entry:
+  ; CHECK-INVALID-NOT: r{{6|8}}
+  tail call void asm sideeffect "", "~{r8},~{r9}"()
+  %vecinit = insertelement <4 x i32> undef, i32 1, i32 0
+  %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
+  %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
+  %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
+  ret <4 x i32> %vecinit13
+}
+
Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi  -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s
+;
+; Equivalent C source code
+; void bar(unsigned int i,
+;  unsigned int j,
+;  unsigned int k,
+;  unsigned int l,
+;  unsigned int m,
+;  unsigned int n,
+;  unsigned int o,
+;  unsigned int p)
+; {
+; unsigned int result = i + j + k + l + m + n + o + p;
+; }
+
+define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind {
+entry:
+; CHECK: push {{{.*}}r4, r5{{.*}}}
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+  %o.addr = alloca i32, align 4
+  %p.addr = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %l, i32* %l.addr, align 4
+  store i32 %m, i32* %m.addr, align 4

[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-10-30 Thread Anna Welker via Phabricator via cfe-commits
anwel added inline comments.



Comment at: clang/lib/Basic/Targets/ARM.cpp:903-907
+  for (std::string  : Features) {
+if (Feature.compare(SearchFeature) == 0)
+  return true;
+  }
+  return false;

chill wrote:
> This explicit loop can be written like:
> ```
> return llvm::any_of(getTargetOpts().Features(),
>[&](auto ) { return P == SearchFeature; });
> ```
> 
I see your point, but using a for loop seems to better match the style of the 
code.



Comment at: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll:3
+;
+; Equivalent C source code
+; void bar(unsigned int i,

SjoerdMeijer wrote:
> As all these tests (this file and the ones above) are the same, the 
> "equivalent C source code" is the same, perhaps move all these cases into 1 
> file.
I see your point, but I'd still prefer to leave them as they are because in my 
opinion having the test cases separated into individual files it's much easier 
to grasp what they are doing.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-10-30 Thread Anna Welker via Phabricator via cfe-commits
anwel updated this revision to Diff 227094.
anwel marked 9 inline comments as done.
anwel added a comment.

Rebase and make some variables const


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a + b + c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg deficit.
+define <4 x i32> @four_high_four_return_reserved() {
+entry:
+  ; CHECK-INVALID-NOT: r{{6|8}}
+  tail call void asm sideeffect "", "~{r8},~{r9}"()
+  %vecinit = insertelement <4 x i32> undef, i32 1, i32 0
+  %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
+  %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
+  %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
+  ret <4 x i32> %vecinit13
+}
+
Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi  -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s
+;
+; Equivalent C source code
+; void bar(unsigned int i,
+;  unsigned int j,
+;  unsigned int k,
+;  unsigned int l,
+;  unsigned int m,
+;  unsigned int n,
+;  unsigned int o,
+;  unsigned int p)
+; {
+; unsigned int result = i + j + k + l + m + n + o + p;
+; }
+
+define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind {
+entry:
+; CHECK: push {{{.*}}r4, r5{{.*}}}
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+  %o.addr = alloca i32, align 4
+  %p.addr = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %l, i32* %l.addr, align 4

[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-10-14 Thread Anna Welker via Phabricator via cfe-commits
anwel updated this revision to Diff 224862.
anwel added a comment.

Applied some minor changes suggested in the comments, including renaming the 
array of reserved registers.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68862/new/

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a + b + c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg deficit.
+define <4 x i32> @four_high_four_return_reserved() {
+entry:
+  ; CHECK-INVALID-NOT: r{{6|8}}
+  tail call void asm sideeffect "", "~{r8},~{r9}"()
+  %vecinit = insertelement <4 x i32> undef, i32 1, i32 0
+  %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1
+  %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2
+  %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3
+  ret <4 x i32> %vecinit13
+}
+
Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi  -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s
+;
+; Equivalent C source code
+; void bar(unsigned int i,
+;  unsigned int j,
+;  unsigned int k,
+;  unsigned int l,
+;  unsigned int m,
+;  unsigned int n,
+;  unsigned int o,
+;  unsigned int p)
+; {
+; unsigned int result = i + j + k + l + m + n + o + p;
+; }
+
+define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind {
+entry:
+; CHECK: push {{{.*}}r4, r5{{.*}}}
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %l.addr = alloca i32, align 4
+  %m.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+  %o.addr = alloca i32, align 4
+  %p.addr = alloca i32, align 4
+  %result = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store 

[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM

2019-10-11 Thread Anna Welker via Phabricator via cfe-commits
anwel created this revision.
anwel added reviewers: carwil, amilendra_arm, phosek, michaelplatings, efriedma.
anwel added projects: LLVM, clang.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.

This patch combines two earlier patches aiming at providing the same support 
(https://reviews.llvm.org/D56003 for clang, https://reviews.llvm.org/D56005 for 
LLVM). It enables reservation of allocatable registers via command line 
options, which in turn allows them to be used as global named register 
variables. They will then not be used by the register allocator nor spilled to 
the stack.

More information is available in the original RFC: 
http://lists.llvm.org/pipermail/llvm-dev/2018-December/128706.html

Changes from the previous patches include:

- adding a constraint to specify -ffixed-rN if rN is used as named register 
variable.
- upgrading the frame-pointer warning to an error and throwing an error in 
LLVM, as well as clang.*

Additionally this patch now only supports r6-r11. r4 and r5 are excluded from 
this patch as r4 is used as hard-coded scratch register in various parts of the 
ARM backend. r4 also appears to be used as an input register for a Windows asm 
routine (__chkstk). Similarly, the ABI of the segmented stack prologue for 
Android and Linux seems to use r4 and r5 as input registers. A separate patch 
could follow to add the support for r4 and/or r5, such that the whole range of 
allocatable registers (r4-r11) is available.

As before it should be noted that this also changes the behaviour of the old 
-ffixed-r9 option. This option will now prevent the register from being spilled 
to the stack.

*This was originally a warning, but we don't seem to have the necessary 
information to determine frame-pointer usage in the given context. Any insight 
here would be welcome.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68862

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/Driver/ToolChains/Arch/ARM.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Driver/arm-reserved-reg-options.c
  clang/test/Sema/arm-global-regs.c
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMAsmPrinter.cpp
  llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
  llvm/lib/Target/ARM/ARMFrameLowering.cpp
  llvm/lib/Target/ARM/ARMISelLowering.cpp
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/ARMTargetTransformInfo.h
  llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll
  llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll
  llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll
  llvm/test/CodeGen/Thumb/callee_save_reserved.ll
  llvm/test/Feature/reserve_global_reg.ll

Index: llvm/test/Feature/reserve_global_reg.ll
===
--- /dev/null
+++ llvm/test/Feature/reserve_global_reg.ll
@@ -0,0 +1,29 @@
+; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s
+; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s
+; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s
+
+; int test(int a, int b, int c) {
+;   return a+b+c;
+; }
+
+; Function Attrs: noinline nounwind optnone
+define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, i32* %c.addr, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target.
+; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target.
+
Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \
+; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s
+
+; Reserved low registers should not be used to correct reg