kzhuravl-AMD created this revision.
kzhuravl-AMD added reviewers: arsenm, tstellarAMD.
kzhuravl-AMD added a subscriber: cfe-commits.

Add following kernel function attributes for AMD GPU Tools (debugger, profiler):
  - amdgpu_tools_insert_nops - insert two nop instructions for each high level 
source statement
  - amdgpu_tools_num_reserved_vgpr(<num>) - reserve <num> vector registers and 
do not use throughout kernel execution
  - amdgpu_tools_num_reserved_sgpr(<num>) - reserve <num> scalar registers and 
do not use throughout kernel execution

Also add similar options that cause to insert attributes for each kernel, 
options take precedence

+ Updated docs, added tests

http://reviews.llvm.org/D17764

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Driver/Options.td
  include/clang/Frontend/CodeGenOptions.def
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/TargetInfo.cpp
  lib/Driver/Tools.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
  test/CodeGenOpenCL/amdgpu-tools-attrs.cl
  test/CodeGenOpenCL/amdgpu-tools-opts.cl
  test/SemaOpenCL/amdgpu-tools-attrs.cl

Index: test/SemaOpenCL/amdgpu-tools-attrs.cl
===================================================================
--- /dev/null
+++ test/SemaOpenCL/amdgpu-tools-attrs.cl
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -fsyntax-only %s
+
+// Check attributes with non kernel function
+typedef __attribute__((amdgpu_tools_insert_nops)) struct foo0_s { // expected-error {{'amdgpu_tools_insert_nops' attribute only applies to kernel functions}}
+  int x;
+  int y;
+} foo0_t;
+typedef __attribute__((amdgpu_tools_num_reserved_vgpr(8))) struct foo1_s { // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute only applies to kernel functions}}
+  int x;
+  int y;
+} foo1_t;
+typedef __attribute__((amdgpu_tools_num_reserved_sgpr(4))) struct foo2_s { // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute only applies to kernel functions}}
+  int x;
+  int y;
+} foo2_t;
+
+// Check non integer attribute values.
+__attribute__((amdgpu_tools_num_reserved_vgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_tools_num_reserved_sgpr("DEF"))) kernel void foo4() {} // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute requires an integer constant}}
+
+// Check large attribute values.
+__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296))) kernel void foo5() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo6() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296), amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo7() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+// Check valid attributes.
+__attribute__((amdgpu_tools_insert_nops)) kernel void foo8() {}
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) kernel void foo9() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) kernel void foo10() {}
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) kernel void foo11() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) kernel void foo12() {}
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo13() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo14() {}
+// Make sure 0 VGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_vgpr(0))) kernel void foo15() {}
+// Make sure 0 SGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_sgpr(0))) kernel void foo16() {}
+// Make sure 0 VGPRs and 0 SGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) kernel void foo17() {}
+// Make sure kernel keyword can go before __attribute__ keyword.
+kernel __attribute__((amdgpu_tools_insert_nops)) void foo18() {}
+kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8))) void foo19() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) void foo20() {}
+kernel __attribute__((amdgpu_tools_num_reserved_sgpr(4))) void foo21() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) void foo22() {}
+kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo23() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo24() {}
Index: test/CodeGenOpenCL/amdgpu-tools-opts.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-opts.cl
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR0 %s
+
+kernel void foo() { // expected-no-diagnostics
+// NOP: define void @foo() [[ATTR_NOP:#[0-9]+]]
+// VGPR8: define void @foo() [[ATTR_VGPR8:#[0-9]+]]
+// NOP_VGPR8: define void @foo() [[ATTR_NOP_VGPR8:#[0-9]+]]
+// SGPR4: define void @foo() [[ATTR_SGPR4:#[0-9]+]]
+// NOP_SGPR4: define void @foo() [[ATTR_NOP_SGPR4:#[0-9]+]]
+// VGPR8_SGPR4: define void @foo() [[ATTR_VGPR8_SGPR4:#[0-9]+]]
+// NOP_VGPR8_SGPR4: define void @foo() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]]
+}
+
+kernel void another_foo() { // expected-no-diagnostics
+// NOP: define void @another_foo() [[ATTR_SECOND_NOP:#[0-9]+]]
+}
+
+// NOP-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// NOP-DAG: attributes [[ATTR_SECOND_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// VGPR8-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="8"
+// NOP_VGPR8-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// SGPR4-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4"
+// NOP_SGPR4-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// VGPR8_SGPR4-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// NOP_VGPR8_SGPR4-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// VGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"="0"
+// NOP_VGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="0"
+// SGPR0-NOT: "amdgpu_tools_num_reserved_sgpr"="0"
+// NOP_SGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="0"
+// VGPR0_SGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"="0" "amdgpu_tools_num_reserved_sgpr"="0"
+// NOP_VGPR0_SGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="0" "amdgpu_tools_num_reserved_vgpr"="0"
+// X86_NOP-NOT: "amdgpu_tools_insert_nops"
+// X86_VGPR8-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86_NOP_VGPR8-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_SGPR4-NOT: "amdgpu_tools_num_reserved_sgpr"
+// X86_NOP_SGPR4-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// X86_VGPR8_SGPR4-NOT: "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_NOP_VGPR8_SGPR4-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_VGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86_SGPR0-NOT: "amdgpu_tools_num_reserved_sgpr"
Index: test/CodeGenOpenCL/amdgpu-tools-attrs.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-attrs.cl
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+__attribute__((amdgpu_tools_insert_nops)) // expected-no-diagnostics
+kernel void test_tools_insert_nops() {
+// CHECK: define void @test_tools_insert_nops() [[ATTR_NOP:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr8() {
+// CHECK: define void @test_tools_num_reserved_vgpr8() [[ATTR_VGPR8:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_vgpr8() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8() [[ATTR_NOP_VGPR8:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_sgpr4() {
+// CHECK: define void @test_tools_num_reserved_sgpr4() [[ATTR_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_sgpr4() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_sgpr4() [[ATTR_NOP_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr8_sgpr4() {
+// CHECK: define void @test_tools_num_reserved_vgpr8_sgpr4() [[ATTR_VGPR8_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_vgpr8_sgpr4() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8_sgpr4() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr0() {}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_sgpr0() {}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr0_sgpr0() {}
+
+// CHECK-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// CHECK-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4"
+// CHECK-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// CHECK-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-NOT: "amdgpu_tools_num_reserved_vgpr"="0"
+// CHECK-NOT: "amdgpu_tools_num_reserved_sgpr"="0"
+// X86-NOT: "amdgpu_tools_insert_nops"
+// X86-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86-NOT: "amdgpu_tools_num_reserved_sgpr"
Index: test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC1 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC2 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn-- -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC3 %s
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_prec1() {
+// PREC1: define void @test_prec1() [[ATTR_PREC1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_prec2() {
+// PREC2: define void @test_prec2() [[ATTR_PREC2:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_prec3() {
+// PREC3: define void @test_prec3() [[ATTR_PREC3:#[0-9]+]]
+}
+
+// PREC1-DAG: attributes [[ATTR_PREC1]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="4"
+// PREC2-DAG: attributes [[ATTR_PREC2]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="8"
+// PREC3-DAG: attributes [[ATTR_PREC3]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="8" "amdgpu_tools_num_reserved_vgpr"="4"
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -4703,30 +4703,18 @@
   }
 }
 
-static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  uint32_t NumRegs;
-  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
-    return;
-
-  D->addAttr(::new (S.Context)
-             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
-                               NumRegs,
-                               Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
-                                    const AttributeList &Attr) {
-  uint32_t NumRegs;
-  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+template <typename AMDGPUAttrType>
+static void handleAMDGPUUInt32Attr(Sema &S, Decl *D,
+                                   const AttributeList &Attr) {
+  uint32_t UInt32Arg;
+  Expr *UInt32ArgExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, UInt32ArgExpr, UInt32Arg))
     return;
 
   D->addAttr(::new (S.Context)
-             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
-                               NumRegs,
-                               Attr.getAttributeSpellingListIndex()));
+             AMDGPUAttrType(Attr.getLoc(), S.Context,
+                            UInt32Arg,
+                            Attr.getAttributeSpellingListIndex()));
 }
 
 static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
@@ -5133,10 +5121,19 @@
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
     break;
   case AttributeList::AT_AMDGPUNumVGPR:
-    handleAMDGPUNumVGPRAttr(S, D, Attr);
+    handleAMDGPUUInt32Attr<AMDGPUNumVGPRAttr>(S, D, Attr);
     break;
   case AttributeList::AT_AMDGPUNumSGPR:
-    handleAMDGPUNumSGPRAttr(S, D, Attr);
+    handleAMDGPUUInt32Attr<AMDGPUNumSGPRAttr>(S, D, Attr);
+    break;
+  case AttributeList::AT_AMDGPUToolsInsertNops:
+    handleSimpleAttribute<AMDGPUToolsInsertNopsAttr>(S, D, Attr);
+    break;
+  case AttributeList::AT_AMDGPUToolsNumReservedVGPR:
+    handleAMDGPUUInt32Attr<AMDGPUToolsNumReservedVGPRAttr>(S, D, Attr);
+    break;
+  case AttributeList::AT_AMDGPUToolsNumReservedSGPR:
+    handleAMDGPUUInt32Attr<AMDGPUToolsNumReservedSGPRAttr>(S, D, Attr);
     break;
   case AttributeList::AT_IBAction:
     handleSimpleAttribute<IBActionAttr>(S, D, Attr);
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -664,6 +664,21 @@
     Opts.StackProbeSize = StackProbeSize;
   }
 
+  // Set up AMDGPU Tools arguments.
+  Opts.AMDGPUToolsInsertNopsOpt = Args.hasArg(OPT_amdgpu_tools_insert_nops);
+  if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_vgpr)) {
+    StringRef Val = A->getValue();
+    unsigned AMDGPUToolsNumReservedVGPROpt = Opts.AMDGPUToolsNumReservedVGPROpt;
+    Val.getAsInteger(0, AMDGPUToolsNumReservedVGPROpt);
+    Opts.AMDGPUToolsNumReservedVGPROpt = AMDGPUToolsNumReservedVGPROpt;
+  }
+  if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_sgpr)) {
+    StringRef Val = A->getValue();
+    unsigned AMDGPUToolsNumReservedSGPROpt = Opts.AMDGPUToolsNumReservedSGPROpt;
+    Val.getAsInteger(0, AMDGPUToolsNumReservedSGPROpt);
+    Opts.AMDGPUToolsNumReservedSGPROpt = AMDGPUToolsNumReservedSGPROpt;
+  }
+
   if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
     StringRef Name = A->getValue();
     unsigned Method = llvm::StringSwitch<unsigned>(Name)
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -4813,6 +4813,26 @@
       CmdArgs.push_back("-mstack-probe-size=0");
   }
 
+  // Translate AMDGPU Tools arguments.
+  if (Args.hasArg(options::OPT_amdgpu_tools_insert_nops))
+    CmdArgs.push_back("--amdgpu-tools-insert-nops");
+  if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_vgpr)) {
+    StringRef Size =
+      Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_vgpr);
+
+    if (!Size.empty())
+      CmdArgs.push_back(
+        Args.MakeArgString("--amdgpu-tools-num-reserved-vgpr=" + Size));
+  }
+  if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_sgpr)) {
+    StringRef Size =
+      Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_sgpr);
+
+    if (!Size.empty())
+      CmdArgs.push_back(
+        Args.MakeArgString("--amdgpu-tools-num-reserved-sgpr=" + Size));
+  }
+
   switch (getToolChain().getArch()) {
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -6589,14 +6589,34 @@
     llvm::Function *F = cast<llvm::Function>(GV);
     uint32_t NumVGPR = Attr->getNumVGPR();
     if (NumVGPR != 0)
-      F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+      F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumVGPR));
   }
 
   if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
     llvm::Function *F = cast<llvm::Function>(GV);
     unsigned NumSGPR = Attr->getNumSGPR();
     if (NumSGPR != 0)
-      F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+      F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumSGPR));
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUToolsInsertNopsAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    if (!F->hasFnAttribute(Attr->getSpelling()))
+      F->addFnAttr(Attr->getSpelling());
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUToolsNumReservedVGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    unsigned NumReservedVGPR = Attr->getNumReservedVGPR();
+    if (!F->hasFnAttribute(Attr->getSpelling()) && NumReservedVGPR != 0)
+      F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumReservedVGPR));
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUToolsNumReservedSGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    unsigned NumReservedSGPR = Attr->getNumReservedSGPR();
+    if (!F->hasFnAttribute(Attr->getSpelling()) && NumReservedSGPR != 0)
+      F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumReservedSGPR));
   }
 }
 
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1595,6 +1595,20 @@
     }
   }
 
+  if (getTarget().getTriple().getArch() == llvm::Triple::amdgcn) {
+    // Add AMDGPU Tools attributes if needed.
+    if (CodeGenOpts.AMDGPUToolsInsertNopsOpt)
+      FuncAttrs.addAttribute("amdgpu_tools_insert_nops");
+    if (CodeGenOpts.AMDGPUToolsNumReservedVGPROpt)
+      FuncAttrs.addAttribute(
+        "amdgpu_tools_num_reserved_vgpr",
+        llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedVGPROpt));
+    if (CodeGenOpts.AMDGPUToolsNumReservedSGPROpt)
+      FuncAttrs.addAttribute(
+        "amdgpu_tools_num_reserved_sgpr",
+        llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedSGPROpt));
+  }
+
   if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
     // Conservatively, mark all functions and calls in CUDA as convergent
     // (meaning, they may call an intrinsically convergent op, such as
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -167,6 +167,16 @@
                                             ///< alignment, if not 0.
 VALUE_CODEGENOPT(StackProbeSize    , 32, 4096) ///< Overrides default stack
                                                ///< probe size, even if 0.
+
+/// \brief Control setting of ``amdgpu_tools_insert_nops`` attribute
+CODEGENOPT(AMDGPUToolsInsertNopsOpt, 1, 0)
+/// \brief Control setting of ``amdgpu_tools_num_reserved_vgpr(<num>)``
+/// attribute
+VALUE_CODEGENOPT(AMDGPUToolsNumReservedVGPROpt, 32, 0)
+/// \brief Control setting of ``amdgpu_tools_num_reserved_sgpr(<num>)``
+/// attribute
+VALUE_CODEGENOPT(AMDGPUToolsNumReservedSGPROpt, 32, 0)
+
 CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information
                                   ///< in debug info.
 
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -352,6 +352,17 @@
 def Z_Joined : Joined<["-"], "Z">;
 def all__load : Flag<["-"], "all_load">;
 def allowable__client : Separate<["-"], "allowable_client">;
+def amdgpu_tools_insert_nops :
+  Flag<["--"], "amdgpu-tools-insert-nops">, Flags<[CC1Option, HelpHidden]>,
+  HelpText<"Insert two nop instructions for each high level source statement">;
+def amdgpu_tools_num_reserved_vgpr :
+  Joined<["--"], "amdgpu-tools-num-reserved-vgpr=">,
+  Flags<[CC1Option, HelpHidden]>,
+  HelpText<"Reserve <num> vector registers">, MetaVarName<"<num>">;
+def amdgpu_tools_num_reserved_sgpr :
+  Joined<["--"], "amdgpu-tools-num-reserved-sgpr=">,
+  Flags<[CC1Option, HelpHidden]>,
+  HelpText<"Reserve <num> scalar registers">, MetaVarName<"<num>">;
 def ansi : Flag<["-", "--"], "ansi">;
 def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">;
 def arch : Separate<["-"], "arch">, Flags<[DriverOption]>;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -936,6 +936,65 @@
 }];
 }
 
+def DocCatAMDGPUToolsAttributes :
+  DocumentationCategory<"AMD GPU Tools Attributes"> {
+  let Content = [{
+
+Clang supports following attributes for tools, such as debugger and profiler:
+  }];
+}
+
+def AMDGPUToolsInsertNopsDocs : Documentation {
+  let Category = DocCatAMDGPUToolsAttributes;
+  let Content = [{
+
+Clang supports ``__attribute__((amdgpu_tools_insert_nops))`` attribute on AMD
+Southern Islands GPUs and later. If specified, causes AMD GPU Backend to insert
+two nop instructions for each high level source statement: one nop instruction
+is inserted before first isa instruction of high level source statement, and one
+nop instruction is inserted after last isa instruction of high level source
+statement.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in module if ``--amdgpu-tools-insert-nops`` clang
+command line option is specified.
+  }];
+}
+
+def AMDGPUToolsNumReservedVGPRDocs : Documentation {
+  let Category = DocCatAMDGPUToolsAttributes;
+  let Content = [{
+
+Clang supports ``__attribute__((amdgpu_tools_num_reserved_vgpr(<num>)))``
+attribute on AMD Southern Islands GPUs and later. If specified, causes AMD GPU
+Backend to reserve ``<num>`` number of vector registers and do not use those
+registers throughout kernel function execution. Index of first reserved vector
+register is recorded in ``amd_kernel_code_t::reserved_vgpr_first``.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in module if ``--amdgpu-tools-num-reserved-vgpr=<num>``
+clang command line option is specified. In this case, ``<num>`` takes
+precedence over attribute.
+  }];
+}
+
+def AMDGPUToolsNumReservedSGPRDocs : Documentation {
+  let Category = DocCatAMDGPUToolsAttributes;
+  let Content = [{
+
+Clang supports ``__attribute__((amdgpu_tools_num_reserved_sgpr(<num>)))``
+attribute on AMD Southern Islands GPUs and later. If specified, causes AMD GPU
+Backend to reserve ``<num>`` number of scalar registers and do not use those
+registers throughout kernel function execution. Index of first reserved scalar
+register is recorded in ``amd_kernel_code_t::reserved_sgpr_first``.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in module if ``--amdgpu-tools-num-reserved-sgpr=<num>``
+clang command line option is specified. In this case, ``<num>`` takes
+precedence over attribute.
+  }];
+}
+
 def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
   let Content = [{
 Clang supports several different calling conventions, depending on the target
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -987,7 +987,30 @@
   let Args = [UnsignedArgument<"NumSGPR">];
   let Documentation = [AMDGPUNumSGPRDocs];
   let Subjects = SubjectList<[Function], ErrorDiag,
-                              "ExpectedKernelFunction">;
+                             "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsInsertNops : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_tools_insert_nops">];
+  let Documentation = [AMDGPUToolsInsertNopsDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag,
+                             "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsNumReservedVGPR : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_tools_num_reserved_vgpr">];
+  let Args = [UnsignedArgument<"NumReservedVGPR">];
+  let Documentation = [AMDGPUToolsNumReservedVGPRDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag,
+                             "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsNumReservedSGPR : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_tools_num_reserved_sgpr">];
+  let Args = [UnsignedArgument<"NumReservedSGPR">];
+  let Documentation = [AMDGPUToolsNumReservedSGPRDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag,
+                             "ExpectedKernelFunction">;
 }
 
 def NoSplitStack : InheritableAttr {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to