[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel updated this revision to Diff 229531. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a + b + c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer for this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer for this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer for this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg deficit. +define <4 x i32> @four_high_four_return_reserved() { +entry: + ; CHECK-INVALID-NOT: r{{6|8}} + tail call void asm sideeffect "", "~{r8},~{r9}"() + %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 + %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 + %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 + %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 + ret <4 x i32> %vecinit13 +} + Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll === --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l + m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel updated this revision to Diff 229296. anwel added a comment. Change clang's error message when trying to use the target's frame pointer as GRV to sound more like an error then a warning. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a + b + c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer on this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer on this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer on this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg deficit. +define <4 x i32> @four_high_four_return_reserved() { +entry: + ; CHECK-INVALID-NOT: r{{6|8}} + tail call void asm sideeffect "", "~{r8},~{r9}"() + %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 + %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 + %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 + %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 + ret <4 x i32> %vecinit13 +} + Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll === --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l + m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32*
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel updated this revision to Diff 229269. anwel added a comment. Rebase on current llvm-project master CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a + b + c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg deficit. +define <4 x i32> @four_high_four_return_reserved() { +entry: + ; CHECK-INVALID-NOT: r{{6|8}} + tail call void asm sideeffect "", "~{r8},~{r9}"() + %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 + %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 + %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 + %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 + ret <4 x i32> %vecinit13 +} + Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll === --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l + m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel added inline comments. Comment at: clang/lib/Basic/Targets/ARM.cpp:903-907 + for (std::string : Features) { +if (Feature.compare(SearchFeature) == 0) + return true; + } + return false; chill wrote: > This explicit loop can be written like: > ``` > return llvm::any_of(getTargetOpts().Features(), >[&](auto ) { return P == SearchFeature; }); > ``` > I see your point, but using a for loop seems to better match the style of the code. Comment at: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll:3 +; +; Equivalent C source code +; void bar(unsigned int i, SjoerdMeijer wrote: > As all these tests (this file and the ones above) are the same, the > "equivalent C source code" is the same, perhaps move all these cases into 1 > file. I see your point, but I'd still prefer to leave them as they are because in my opinion having the test cases separated into individual files it's much easier to grasp what they are doing. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel updated this revision to Diff 227094. anwel marked 9 inline comments as done. anwel added a comment. Rebase and make some variables const CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a + b + c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg deficit. +define <4 x i32> @four_high_four_return_reserved() { +entry: + ; CHECK-INVALID-NOT: r{{6|8}} + tail call void asm sideeffect "", "~{r8},~{r9}"() + %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 + %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 + %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 + %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 + ret <4 x i32> %vecinit13 +} + Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll === --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l + m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel updated this revision to Diff 224862. anwel added a comment. Applied some minor changes suggested in the comments, including renaming the array of reserved registers. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68862/new/ https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a + b + c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg deficit. +define <4 x i32> @four_high_four_return_reserved() { +entry: + ; CHECK-INVALID-NOT: r{{6|8}} + tail call void asm sideeffect "", "~{r8},~{r9}"() + %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 + %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 + %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 + %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 + ret <4 x i32> %vecinit13 +} + Index: llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll === --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l + m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store
[PATCH] D68862: [ARM] Allocatable Global Register Variables for ARM
anwel created this revision. anwel added reviewers: carwil, amilendra_arm, phosek, michaelplatings, efriedma. anwel added projects: LLVM, clang. Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls. This patch combines two earlier patches aiming at providing the same support (https://reviews.llvm.org/D56003 for clang, https://reviews.llvm.org/D56005 for LLVM). It enables reservation of allocatable registers via command line options, which in turn allows them to be used as global named register variables. They will then not be used by the register allocator nor spilled to the stack. More information is available in the original RFC: http://lists.llvm.org/pipermail/llvm-dev/2018-December/128706.html Changes from the previous patches include: - adding a constraint to specify -ffixed-rN if rN is used as named register variable. - upgrading the frame-pointer warning to an error and throwing an error in LLVM, as well as clang.* Additionally this patch now only supports r6-r11. r4 and r5 are excluded from this patch as r4 is used as hard-coded scratch register in various parts of the ARM backend. r4 also appears to be used as an input register for a Windows asm routine (__chkstk). Similarly, the ABI of the segmented stack prologue for Android and Linux seems to use r4 and r5 as input registers. A separate patch could follow to add the support for r4 and/or r5, such that the whole range of allocatable registers (r4-r11) is available. As before it should be noted that this also changes the behaviour of the old -ffixed-r9 option. This option will now prevent the register from being spilled to the stack. *This was originally a warning, but we don't seem to have the necessary information to determine frame-pointer usage in the given context. Any insight here would be welcome. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D68862 Files: clang/docs/ClangCommandLineReference.rst clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/TargetInfo.h clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Sema/SemaDecl.cpp clang/test/Driver/arm-reserved-reg-options.c clang/test/Sema/arm-global-regs.c llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMAsmPrinter.cpp llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/lib/Target/ARM/ARMFrameLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/lib/Target/ARM/ARMTargetTransformInfo.h llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll llvm/test/CodeGen/Thumb/callee_save_reserved.ll llvm/test/Feature/reserve_global_reg.ll Index: llvm/test/Feature/reserve_global_reg.ll === --- /dev/null +++ llvm/test/Feature/reserve_global_reg.ll @@ -0,0 +1,29 @@ +; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s +; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s +; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s + +; int test(int a, int b, int c) { +; return a+b+c; +; } + +; Function Attrs: noinline nounwind optnone +define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + ret i32 %add1 +} + +; CHECK-RESERVE-FP7: Register r7 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11: Register r11 has been specified but is used as a frame pointer on this target. +; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as a frame pointer on this target. + Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s + +; Reserved low registers should not be used to correct reg