subject:"\[PATCH\] D75917\: Expose llvm fence instruction as clang intrinsic"

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-26 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG06bdffb2bb45: [AMDGPU] Expose llvm fence instruction as 
clang intrinsic (authored by saiislam, committed by sameerds).

Changed prior to commit:
  https://reviews.llvm.org/D75917?vs=260053=260207#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/Sema/builtin-amdgcn-fence-failure.cpp
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,14 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+  const char ptr[] = "workgroup";
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
Index: clang/test/Sema/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa  | opt -S | FileCheck %s
+
+void test_memory_fence_success() {
+  // CHECK-LABEL: test_memory_fence_success
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+
+  // CHECK: fence syncscope("agent") acquire
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+  // CHECK: fence seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+
+  // CHECK: fence syncscope("agent") acq_rel
+  __builtin_amdgcn_fence(4, "agent");
+
+  // CHECK: fence syncscope("workgroup") release
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1920,6 +1920,10 @@
 if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
   return ExprError();
 break;
+  case llvm::Triple::amdgcn:
+if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
+  return ExprError();
+break;
   default:
 break;
 }
@@ -3033,6 +3037,46 @@
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
+  CallExpr *TheCall) {
+  switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_fence: {
+ExprResult Arg = TheCall->getArg(0);
+auto ArgExpr = Arg.get();
+Expr::EvalResult ArgResult;
+
+if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
+  return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+ << ArgExpr->getType();
+int ord = ArgResult.Val.getInt().getZExtValue();
+
+// Check valididty of memory ordering as per C11 / C++11's memody model.
+switch (static_cast(ord)) {
+case llvm::AtomicOrderingCABI::acquire:
+case llvm::AtomicOrderingCABI::release:
+case llvm::AtomicOrderingCABI::acq_rel:
+

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-24 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 260053.
saiislam added a comment.

Updated description and added a failing test case for integer scope.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/Sema/builtin-amdgcn-fence-failure.cpp
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,14 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+  const char ptr[] = "workgroup";
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
Index: clang/test/Sema/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa  | opt -S | FileCheck %s
+
+void test_memory_fence_success() {
+// CHECK-LABEL: test_memory_fence_success
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
+
+   // CHECK: fence syncscope("agent") acquire
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+  // CHECK: fence seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+
+  // CHECK: fence syncscope("agent") acq_rel
+  __builtin_amdgcn_fence(4, "agent");
+
+// CHECK: fence syncscope("workgroup") release
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1920,6 +1920,10 @@
 if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
   return ExprError();
 break;
+  case llvm::Triple::amdgcn:
+if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
+  return ExprError();
+break;
   default:
 break;
 }
@@ -2921,6 +2925,45 @@
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_fence: {
+  ExprResult Arg = TheCall->getArg(0);
+  auto ArgExpr = Arg.get();
+  Expr::EvalResult ArgResult;
+
+  if(!ArgExpr->EvaluateAsInt(ArgResult, Context))
+return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+  << ArgExpr->getType();
+  int ord = ArgResult.Val.getInt().getZExtValue();
+
+  // Check valididty of memory ordering as per C11 / C++11's memody model.
+  switch (static_cast(ord)) {
+case llvm::AtomicOrderingCABI::acquire:
+case llvm::AtomicOrderingCABI::release:
+case llvm::AtomicOrderingCABI::acq_rel:
+case llvm::AtomicOrderingCABI::seq_cst:
+  break;
+default: {
+  return Diag(ArgExpr->getBeginLoc(),
+

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-23 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds accepted this revision.
sameerds added a comment.
This revision is now accepted and ready to land.

Thanks @saiislam ... this looks much better!

Two nitpicks, that must be fixed. But it is okay if you directly submit after 
fixing them.

1. The change description should use "const char *" in the signature and not 
"String".
2. Can you please add a test that passes an integer constant as the scope? I am 
assuming that the signature check will complain that it is not a string.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-23 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: clang/lib/Sema/SemaChecking.cpp:2958
+  // Check that sync scope is a constant literal
+  if(!ArgExpr->EvaluateAsConstantExpr(ArgResult1,
+  Expr::EvaluateForCodeGen, Context))

@sameerds here is the check for sync scope to be constant literal


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-23 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 259545.
saiislam added a comment.

Added check and test for sync scope to be a constant literal.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/Sema/builtin-amdgcn-fence-failure.cpp
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,14 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+
+  const char ptr[] = "workgroup";
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
Index: clang/test/Sema/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa  | opt -S | FileCheck %s
+
+void test_memory_fence_success() {
+// CHECK-LABEL: test_memory_fence_success
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
+
+   // CHECK: fence syncscope("agent") acquire
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+  // CHECK: fence seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+
+  // CHECK: fence syncscope("agent") acq_rel
+  __builtin_amdgcn_fence(4, "agent");
+
+// CHECK: fence syncscope("workgroup") release
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1920,6 +1920,10 @@
 if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
   return ExprError();
 break;
+  case llvm::Triple::amdgcn:
+if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
+  return ExprError();
+break;
   default:
 break;
 }
@@ -2921,6 +2925,45 @@
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_fence: {
+  ExprResult Arg = TheCall->getArg(0);
+  auto ArgExpr = Arg.get();
+  Expr::EvalResult ArgResult;
+
+  if(!ArgExpr->EvaluateAsInt(ArgResult, Context))
+return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+  << ArgExpr->getType();
+  int ord = ArgResult.Val.getInt().getZExtValue();
+
+  // Check valididty of memory ordering as per C11 / C++11's memody model.
+  switch (static_cast(ord)) {
+case llvm::AtomicOrderingCABI::acquire:
+case llvm::AtomicOrderingCABI::release:
+case llvm::AtomicOrderingCABI::acq_rel:
+case llvm::AtomicOrderingCABI::seq_cst:
+  break;
+default: {
+  return Diag(ArgExpr->getBeginLoc(),
+diag::warn_atomic_op_has_invalid_memory_order)
+  << ArgExpr->getSourceRange();
+}
+  }
+
+  Arg = TheCall->getArg(1);
+  ArgExpr = Arg.get();
+

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-23 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 259485.
saiislam added a comment.

Removed documentation from clang doc. Squashed all changes into a single commit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/Sema/builtin-amdgcn-fence-failure.cpp
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,11 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/Sema/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
+
+void test_memory_fence_success() {
+// CHECK-LABEL: test_memory_fence_success
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
+
+   // CHECK: fence syncscope("agent") acquire
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+  // CHECK: fence seq_cst
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+
+  // CHECK: fence syncscope("agent") acq_rel
+  __builtin_amdgcn_fence(4, "agent");
+
+// CHECK: fence syncscope("workgroup") release
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1920,6 +1920,10 @@
 if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
   return ExprError();
 break;
+  case llvm::Triple::amdgcn:
+if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
+  return ExprError();
+break;
   default:
 break;
 }
@@ -2921,6 +2925,37 @@
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  switch (BuiltinID) {
+case AMDGPU::BI__builtin_amdgcn_fence: {
+  ExprResult Arg = TheCall->getArg(0);
+  auto ArgExpr = Arg.get();
+  Expr::EvalResult ArgResult;
+
+  if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) {
+return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+  << ArgExpr->getType();
+  }
+  int ord = ArgResult.Val.getInt().getZExtValue();
+
+  // Check valididty of memory ordering as per C11 / C++11's memody model.
+  switch (static_cast(ord)) {
+case llvm::AtomicOrderingCABI::acquire:
+case llvm::AtomicOrderingCABI::release:
+case llvm::AtomicOrderingCABI::acq_rel:
+case llvm::AtomicOrderingCABI::seq_cst:
+  break;
+default: {
+  return Diag(ArgExpr->getBeginLoc(),
+diag::warn_atomic_op_has_invalid_memory_order)
+  << ArgExpr->getSourceRange();
+}
+  }
+} break;
+  }
+  return false;
+}
+
 bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
CallExpr

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added a comment.

Can you please submit a squashed single commit for review against the master 
branch? All the recent commits seem to be relative to previous commits, and I 
am having trouble looking at the change as a whole.

The commit description needs some fixes:

1. Remove use of Title Case, in places like "using Fence instruction" and "LLVM 
Atomic Memory Ordering" and "as a String". They are simply common nouns, not 
proper nouns. When in doubt, look at the LangRef to see how these words are 
used as common nouns.
2. Don't mention that enum values are okay too. I am sure they will always be 
okay, but it's better to encourage the use of symbols.
3. Don't mention HSA even if the AMDGPU user manual does so.
4. In fact the last two sentences in the description are not strictly necessary 
... they are the logical outcome of the scopes being target-specific strings.
5. Note that the general practice in documentation is to say "AMDGPU" which 
covers the LLVM Target, while "amdgcn" is only used in the code because it is 
one of multiple architectures in the AMDGPU backend.




Comment at: clang/docs/LanguageExtensions.rst:2458
 
+AMDGCN specific builtins
+-

We probably don't need to document the new builtin here. Clearly, we have not 
documented any other AMDGPU builtin, and there is no need to start now. If 
necessary, we can take that up as a separate task covering all builtins.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked an inline comment as done.
saiislam added inline comments.



Comment at: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp:5
+
+void test_amdgcn_fence_failure() {
+

b-sumner wrote:
> JonChesterfield wrote:
> > arsenm wrote:
> > > Does this really depend on C++? Can it use OpenCL like the other builtin 
> > > tests?This also belongs in a Sema* test directory since it's checking an 
> > > error
> > Making it opencl-only would force some of the openmp runtime to be written 
> > in opencl, which is not presently the case. Currently that library is 
> > written in a dialect of hip, but there's a plan to implement it in openmp 
> > instead.
> > 
> > I'd much rather this builtin work from any language, instead of tying it to 
> > opencl, as that means one can use it from openmp target regions.
> I thought the question was about this test itself.  The test being in 
> CodeGenOpenCL doesn't affect whether other languages can use the builtin.  
> Why not put this test in CodeGenOpenCL alongside all of the other 
> builtins-amdgcn-*.cl ?
This test is basically relying on implementation of sync scope in the AMDGCN 
backend to check for validity, and is not testing the code generation 
capability. Doesn't it make more sense in Sema directory?

Won't putting it in SemaOpenCL or SemaOpenCLCXX indicate some kind of relation 
with OpenCL?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 259298.
saiislam added a comment.

Moved builtin-amdgcn-fence-failure.cpp from clang/test/CodeGenCXX/
to clang/test/Sema/ since it is checking an error.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp
  clang/test/Sema/builtin-amdgcn-fence-failure.cpp
  clang/test/Sema/builtins.c
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,11 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/Sema/builtins.c
===
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -320,15 +320,3 @@
   // expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}}
   return __builtin_is_constant_evaluated();
 }
-
-void test_memory_fence_errors() {
-  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}}
-
-  __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
-
-  __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
-}
Index: clang/test/Sema/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope 
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,25 +1,22 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
 // RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
 
 void test_memory_fence_success() {
 // CHECK-LABEL: test_memory_fence_success
 
   // CHECK: fence syncscope("workgroup") seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
   
// CHECK: fence syncscope("agent") acquire
-  __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
 
   // CHECK: fence seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
 
   // CHECK: fence syncscope("agent") acq_rel
-  __builtin_memory_fence(4, "agent");
+  __builtin_amdgcn_fence(4, "agent");
 
 // CHECK: fence syncscope("workgroup") release
-  __builtin_memory_fence(3, "workgroup");
-
-  // CHECK: fence syncscope("foobar") release
-  __builtin_memory_fence(3, "foobar");
-}
\ No newline at end of file
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
---

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Brian Sumner via Phabricator via cfe-commits

b-sumner added inline comments.



Comment at: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp:5
+
+void test_amdgcn_fence_failure() {
+

JonChesterfield wrote:
> arsenm wrote:
> > Does this really depend on C++? Can it use OpenCL like the other builtin 
> > tests?This also belongs in a Sema* test directory since it's checking an 
> > error
> Making it opencl-only would force some of the openmp runtime to be written in 
> opencl, which is not presently the case. Currently that library is written in 
> a dialect of hip, but there's a plan to implement it in openmp instead.
> 
> I'd much rather this builtin work from any language, instead of tying it to 
> opencl, as that means one can use it from openmp target regions.
I thought the question was about this test itself.  The test being in 
CodeGenOpenCL doesn't affect whether other languages can use the builtin.  Why 
not put this test in CodeGenOpenCL alongside all of the other 
builtins-amdgcn-*.cl ?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp:5
+
+void test_amdgcn_fence_failure() {
+

arsenm wrote:
> Does this really depend on C++? Can it use OpenCL like the other builtin 
> tests?This also belongs in a Sema* test directory since it's checking an error
Making it opencl-only would force some of the openmp runtime to be written in 
opencl, which is not presently the case. Currently that library is written in a 
dialect of hip, but there's a plan to implement it in openmp instead.

I'd much rather this builtin work from any language, instead of tying it to 
opencl, as that means one can use it from openmp target regions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Matt Arsenault via Phabricator via cfe-commits

arsenm added inline comments.



Comment at: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp:5
+
+void test_amdgcn_fence_failure() {
+

Does this really depend on C++? Can it use OpenCL like the other builtin 
tests?This also belongs in a Sema* test directory since it's checking an error


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

Amdgcn specific is fine by me. Hopefully that unblocks this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 259239.
saiislam added a comment.

Removed stale commented code


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp
  clang/test/Sema/builtins.c
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,11 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/Sema/builtins.c
===
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -320,15 +320,3 @@
   // expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}}
   return __builtin_is_constant_evaluated();
 }
-
-void test_memory_fence_errors() {
-  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}}
-
-  __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
-
-  __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
-}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,25 +1,22 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
 // RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
 
 void test_memory_fence_success() {
 // CHECK-LABEL: test_memory_fence_success
 
   // CHECK: fence syncscope("workgroup") seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
   
// CHECK: fence syncscope("agent") acquire
-  __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
 
   // CHECK: fence seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
 
   // CHECK: fence syncscope("agent") acq_rel
-  __builtin_memory_fence(4, "agent");
+  __builtin_amdgcn_fence(4, "agent");
 
 // CHECK: fence syncscope("workgroup") release
-  __builtin_memory_fence(3, "workgroup");
-
-  // CHECK: fence syncscope("foobar") release
-  __builtin_memory_fence(3, "foobar");
-}
\ No newline at end of file
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope 
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1870,34

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-22 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 259238.
saiislam added a comment.
Herald added subscribers: kerbowa, nhaehnle, jvesely.

Changed the builtin to be AMDGCN-specific

It is named as __builtin_amdgcn_fence(order, scope)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
  clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp
  clang/test/Sema/builtins.c
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,11 @@
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
 }
+
+void test_fence() {
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/Sema/builtins.c
===
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -320,15 +320,3 @@
   // expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}}
   return __builtin_is_constant_evaluated();
 }
-
-void test_memory_fence_errors() {
-  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
-  __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}}
-
-  __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
-
-  __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
-}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===
--- clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,25 +1,22 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
 // RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
 
 void test_memory_fence_success() {
 // CHECK-LABEL: test_memory_fence_success
 
   // CHECK: fence syncscope("workgroup") seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST,  "workgroup");
   
// CHECK: fence syncscope("agent") acquire
-  __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
 
   // CHECK: fence seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
 
   // CHECK: fence syncscope("agent") acq_rel
-  __builtin_memory_fence(4, "agent");
+  __builtin_amdgcn_fence(4, "agent");
 
 // CHECK: fence syncscope("workgroup") release
-  __builtin_memory_fence(3, "workgroup");
-
-  // CHECK: fence syncscope("foobar") release
-  __builtin_memory_fence(3, "foobar");
-}
\ No newline at end of file
+  __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope 
+  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/lib/Sema/SemaChecking.cpp

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-17 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

The tests look good, but I can't see the implementation in this diff. Maybe a 
file missing from the patch? Can be hard to tell with phabricator, the error 
may be at my end.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-17 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 258368.
saiislam marked an inline comment as done.
saiislam added a comment.

1. Moved test case to clang/test/CodeGenCXX.
2. Added a failing test case with invalid sync scope, which gets detected by 
implementation of fence instruction.
3. Updated the change description of the builtin.
4. Updated the clang documentation describing mapping of C++ memory-ordering to 
LLVM memory-ordering.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/docs/LanguageExtensions.rst
  clang/test/CodeGenCXX/builtin-memory-fence-failure.cpp
  clang/test/CodeGenCXX/builtin-memory-fence.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp

Index: clang/test/CodeGenHIP/builtin_memory_fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenHIP/builtin_memory_fence.cpp
@@ -1,25 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
-// RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
-
-void test_memory_fence_success() {
-// CHECK-LABEL: test_memory_fence_success
-
-  // CHECK: fence syncscope("workgroup") seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
-  
-   // CHECK: fence syncscope("agent") acquire
-  __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
-
-  // CHECK: fence seq_cst
-  __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
-
-  // CHECK: fence syncscope("agent") acq_rel
-  __builtin_memory_fence(4, "agent");
-
-// CHECK: fence syncscope("workgroup") release
-  __builtin_memory_fence(3, "workgroup");
-
-  // CHECK: fence syncscope("foobar") release
-  __builtin_memory_fence(3, "foobar");
-}
\ No newline at end of file
Index: clang/test/CodeGenCXX/builtin-memory-fence-failure.cpp
===
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-memory-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN:   -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_memory_fence_failure() {
+
+  // CHECK: error: Unsupported atomic synchronization scope 
+  __builtin_memory_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/docs/LanguageExtensions.rst
===
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -2455,6 +2455,59 @@
 and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
 corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
 
+``__builtin_memory_fence``
+-
+
+``__builtin_memory_fence`` allows using `Fence instruction `_ 
+from clang. It takes C++11 compatible memory-ordering and target-specific
+sync-scope as arguments, and generates a fence instruction in the IR.
+
+**Syntax**:
+
+.. code-block:: c++
+
+__builtin_memory_fence(unsigned int memory_ordering, String sync_scope)
+
+**Example of use**:
+
+.. code-block:: c++
+
+  void my_fence(int i) {
+i++;
+__builtin_memory_fence(__ATOMIC_ACQUIRE,  "workgroup");
+i--;
+__builtin_memory_fence(__ATOMIC_SEQ_CST,  "agent");
+  }
+
+**Description**:
+
+The first argument of ``__builtin_memory_fence()`` builtin is one of the
+memory-ordering specifiers ``__ATOMIC_ACQUIRE``, ``__ATOMIC_RELEASE``,
+``__ATOMIC_ACQ_REL``, or ``__ATOMIC_SEQ_CST`` following C++11 memory model
+semantics. Equivalent enum values of these memory-ordering can also be 
+specified. The builtin maps these C++ memory-ordering to corresponding
+LLVM Atomic Memory Ordering for the fence instruction using LLVM Atomic C
+ABI, as given in the table below. The second argument is a target-specific
+synchronization scope defined as a String. This builtin transparently
+passes the second argument to fence instruction and relies on target
+implementation for validity check.
+
++--++
+| Input in clang   | Output in IR   |
+| (C++11 Memory-ordering)  | (LLVM Atomic Memory-ordering)  |
++==+===++===+
+| Enum | Value | Enum   | Value |
++--+---++---+
+| ``__ATOMIC_ACQUIRE`` | 2 | Acquire| 4 |
++--+---++---+
+| ``__ATOMIC_RELEASE`` | 3 | Release| 5 |
++--+---++---+
+| ``__ATOMIC_ACQ_REL`` | 4 | AcquireRelease | 6 |
++--+---++---+
+| ``__ATOMIC_SEQ_CST`` | 5 | SequentiallyConsistent | 7 |

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-08 Thread Brian Sumner via Phabricator via cfe-commits

b-sumner added a comment.

In addition to predefining `__ATOMIC_RELAXED`, etc., clang also predefines 
`__OPENCL_MEMORY_SCOPE_WORK_ITEM` and friends.  So it doesn't really seem 
unreasonable for clang to also predefine its known syncscopes, and to require 
the argument to be one of those integers.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added inline comments.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

JonChesterfield wrote:
> JonChesterfield wrote:
> > sameerds wrote:
> > > JonChesterfield wrote:
> > > > sameerds wrote:
> > > > > JonChesterfield wrote:
> > > > > > saiislam wrote:
> > > > > > > sameerds wrote:
> > > > > > > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory 
> > > > > > > > models. They should not be used with the new builtin because 
> > > > > > > > this new builtin does not follow any specific language model. 
> > > > > > > > For user convenience, the right thing to do is to introduce new 
> > > > > > > > tokens in the Clang preprocessor, similar to the `__ATOMIC_*` 
> > > > > > > > tokens. The convenient shortcut is to just tell the user to 
> > > > > > > > supply numerical values by looking at the LLVM source code.
> > > > > > > > 
> > > > > > > > From llvm/Support/AtomicOrdering.h, note how the numerical 
> > > > > > > > value for `__ATOMIC_SEQ_CST` is 5, but the numerical value for 
> > > > > > > > the LLVM SequentiallyConsistent ordering is 7. The numerical 
> > > > > > > > value 5 refers to the LLVM ordering "release". So, if the 
> > > > > > > > implementation were correct, this line should result in the 
> > > > > > > > following unexpected LLVM IR:
> > > > > > > >   fence syncscope("workgroup") release
> > > > > > > As you pointed out, the range of acquire to sequentiallly 
> > > > > > > consistent memory orders for llvm::AtomicOrdering is [4, 7], 
> > > > > > > while for llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was 
> > > > > > > taken to ensure easy of use for the users who are familiar with 
> > > > > > > C/C++ standard memory model. It allows them to use macros like 
> > > > > > > __ATOMIC_ACQUIRE etc.
> > > > > > > Clang CodeGen of the builtin internally maps C ABI ordering to 
> > > > > > > llvm atomic ordering.
> > > > > > What language, implemented in clang, do you have in mind that 
> > > > > > reusing the existing __ATOMIC_* macros would be incorrect for?
> > > > > I think we agreed that this builtin exposes the LLVM fence exactly. 
> > > > > That would mean it takes arguments defined by LLVM. If you are 
> > > > > implementing something different from that, then it first needs to be 
> > > > > specified properly. Perhaps you could say that this is a C ABI 
> > > > > compatible builtin, that happens to take target specific scopes? That 
> > > > > should cover OpenCL whose scope enum is designed to be compatible 
> > > > > with C.
> > > > > 
> > > > > Whatever it is that you are trying to implement here, it definitely 
> > > > > does not expose a raw LLVM fence.
> > > > The llvm fence, in text form, uses a symbol for the memory scope. Not 
> > > > an enum.
> > > > 
> > > > This symbol is set using these macros for the existing atomic builtins. 
> > > > Using an implementation detail of clang instead is strictly worse, by 
> > > > layering and by precedent.
> > > > 
> > > > ABI is not involved here. Nor is OpenCl.
> > > The `__ATOMIC_*` symbols in Clang quite literally represent the C/C++ 
> > > ABI. See the details in AtomicOrdering.h and InitPreprocessor.cpp. I am 
> > > not opposed to specifying that the builtin expects these symbols, but 
> > > then it is incorrect to say that the builtin exposes the raw LLVM 
> > > builtin. It is a C-ABI-compatible builtin that happens to take 
> > > target-specific scope as a string argument. And that would also make it 
> > > an overload of the already existing builting __atomic_fence().
> > I don't know what you mean by "raw",  but am guessing you're asking for 
> > documentation for the intrinsic. Said documentation should indeed be added 
> > for this builtin - it'll probably be in a tablegen file.
> I will try to stop using builtin and intrinsic as synonyms.
Right. It's actually an LLVM instruction, not even an intrinsic. I am guilty of 
using the wrong term quite often, but usually the context helps. I think the 
following is still needed:

  # A testcase that exercises the builtin with an invalid string argument for 
scope.
  # An update to the change description describing what is being introduced 
here. It is more than a direct mapping from builtin to instruction. The C ABI 
is involved.
  # An update to the Clang documentation describing this new builtin: 
https://clang.llvm.org/docs/LanguageExtensions.html#builtin-functions



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:1
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \

Codegen test should be under CodeGen and/or CodeGenCXX


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

JonChesterfield wrote:
> sameerds wrote:
> > JonChesterfield wrote:
> > > sameerds wrote:
> > > > JonChesterfield wrote:
> > > > > saiislam wrote:
> > > > > > sameerds wrote:
> > > > > > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory 
> > > > > > > models. They should not be used with the new builtin because this 
> > > > > > > new builtin does not follow any specific language model. For user 
> > > > > > > convenience, the right thing to do is to introduce new tokens in 
> > > > > > > the Clang preprocessor, similar to the `__ATOMIC_*` tokens. The 
> > > > > > > convenient shortcut is to just tell the user to supply numerical 
> > > > > > > values by looking at the LLVM source code.
> > > > > > > 
> > > > > > > From llvm/Support/AtomicOrdering.h, note how the numerical value 
> > > > > > > for `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > > > > > > SequentiallyConsistent ordering is 7. The numerical value 5 
> > > > > > > refers to the LLVM ordering "release". So, if the implementation 
> > > > > > > were correct, this line should result in the following unexpected 
> > > > > > > LLVM IR:
> > > > > > >   fence syncscope("workgroup") release
> > > > > > As you pointed out, the range of acquire to sequentiallly 
> > > > > > consistent memory orders for llvm::AtomicOrdering is [4, 7], while 
> > > > > > for llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was taken to 
> > > > > > ensure easy of use for the users who are familiar with C/C++ 
> > > > > > standard memory model. It allows them to use macros like 
> > > > > > __ATOMIC_ACQUIRE etc.
> > > > > > Clang CodeGen of the builtin internally maps C ABI ordering to llvm 
> > > > > > atomic ordering.
> > > > > What language, implemented in clang, do you have in mind that reusing 
> > > > > the existing __ATOMIC_* macros would be incorrect for?
> > > > I think we agreed that this builtin exposes the LLVM fence exactly. 
> > > > That would mean it takes arguments defined by LLVM. If you are 
> > > > implementing something different from that, then it first needs to be 
> > > > specified properly. Perhaps you could say that this is a C ABI 
> > > > compatible builtin, that happens to take target specific scopes? That 
> > > > should cover OpenCL whose scope enum is designed to be compatible with 
> > > > C.
> > > > 
> > > > Whatever it is that you are trying to implement here, it definitely 
> > > > does not expose a raw LLVM fence.
> > > The llvm fence, in text form, uses a symbol for the memory scope. Not an 
> > > enum.
> > > 
> > > This symbol is set using these macros for the existing atomic builtins. 
> > > Using an implementation detail of clang instead is strictly worse, by 
> > > layering and by precedent.
> > > 
> > > ABI is not involved here. Nor is OpenCl.
> > The `__ATOMIC_*` symbols in Clang quite literally represent the C/C++ ABI. 
> > See the details in AtomicOrdering.h and InitPreprocessor.cpp. I am not 
> > opposed to specifying that the builtin expects these symbols, but then it 
> > is incorrect to say that the builtin exposes the raw LLVM builtin. It is a 
> > C-ABI-compatible builtin that happens to take target-specific scope as a 
> > string argument. And that would also make it an overload of the already 
> > existing builting __atomic_fence().
> I don't know what you mean by "raw",  but am guessing you're asking for 
> documentation for the intrinsic. Said documentation should indeed be added 
> for this builtin - it'll probably be in a tablegen file.
I will try to stop using builtin and intrinsic as synonyms.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

sameerds wrote:
> JonChesterfield wrote:
> > sameerds wrote:
> > > JonChesterfield wrote:
> > > > saiislam wrote:
> > > > > sameerds wrote:
> > > > > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory 
> > > > > > models. They should not be used with the new builtin because this 
> > > > > > new builtin does not follow any specific language model. For user 
> > > > > > convenience, the right thing to do is to introduce new tokens in 
> > > > > > the Clang preprocessor, similar to the `__ATOMIC_*` tokens. The 
> > > > > > convenient shortcut is to just tell the user to supply numerical 
> > > > > > values by looking at the LLVM source code.
> > > > > > 
> > > > > > From llvm/Support/AtomicOrdering.h, note how the numerical value 
> > > > > > for `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > > > > > SequentiallyConsistent ordering is 7. The numerical value 5 refers 
> > > > > > to the LLVM ordering "release". So, if the implementation were 
> > > > > > correct, this line should result in the following unexpected LLVM 
> > > > > > IR:
> > > > > >   fence syncscope("workgroup") release
> > > > > As you pointed out, the range of acquire to sequentiallly consistent 
> > > > > memory orders for llvm::AtomicOrdering is [4, 7], while for 
> > > > > llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was taken to 
> > > > > ensure easy of use for the users who are familiar with C/C++ standard 
> > > > > memory model. It allows them to use macros like __ATOMIC_ACQUIRE etc.
> > > > > Clang CodeGen of the builtin internally maps C ABI ordering to llvm 
> > > > > atomic ordering.
> > > > What language, implemented in clang, do you have in mind that reusing 
> > > > the existing __ATOMIC_* macros would be incorrect for?
> > > I think we agreed that this builtin exposes the LLVM fence exactly. That 
> > > would mean it takes arguments defined by LLVM. If you are implementing 
> > > something different from that, then it first needs to be specified 
> > > properly. Perhaps you could say that this is a C ABI compatible builtin, 
> > > that happens to take target specific scopes? That should cover OpenCL 
> > > whose scope enum is designed to be compatible with C.
> > > 
> > > Whatever it is that you are trying to implement here, it definitely does 
> > > not expose a raw LLVM fence.
> > The llvm fence, in text form, uses a symbol for the memory scope. Not an 
> > enum.
> > 
> > This symbol is set using these macros for the existing atomic builtins. 
> > Using an implementation detail of clang instead is strictly worse, by 
> > layering and by precedent.
> > 
> > ABI is not involved here. Nor is OpenCl.
> The `__ATOMIC_*` symbols in Clang quite literally represent the C/C++ ABI. 
> See the details in AtomicOrdering.h and InitPreprocessor.cpp. I am not 
> opposed to specifying that the builtin expects these symbols, but then it is 
> incorrect to say that the builtin exposes the raw LLVM builtin. It is a 
> C-ABI-compatible builtin that happens to take target-specific scope as a 
> string argument. And that would also make it an overload of the already 
> existing builting __atomic_fence().
I don't know what you mean by "raw",  but am guessing you're asking for 
documentation for the intrinsic. Said documentation should indeed be added for 
this builtin - it'll probably be in a tablegen file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds requested changes to this revision.
sameerds added inline comments.
This revision now requires changes to proceed.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13651
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+

JonChesterfield wrote:
> sameerds wrote:
> > saiislam wrote:
> > > sameerds wrote:
> > > > This seems to be creating a new ID for any arbitrary string passed as 
> > > > sync scope. This should be validated against 
> > > > LLVMContext::getSyncScopeNames(). 
> > > As the FE is not aware about specific target and implementation of sync 
> > > scope for that target, getSyncScopeNames() here returns llvm'sdefault 
> > > sync scopes, which only supports singlethreaded and system as valid 
> > > scopes. Validity checking of memory scope string is being intentionally 
> > > left for the later stages which deal with the generated IR.
> > That's pretty strange. At this point, Clang should know what the target is, 
> > and it should have a chance to update the list of sync scopes somewhere. 
> > @arsenm, do you see a way around this?
> There is already sufficient IR level checking for the string at the 
> instruction level. Warning in clang as well could be a nicer user experience, 
> but that seems low priority to me.
If there is some checking happening anywhere, then that needs to be 
demonstrated in a testcase where the input high-level program passes an illegal 
string as the scope argument.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

JonChesterfield wrote:
> sameerds wrote:
> > JonChesterfield wrote:
> > > saiislam wrote:
> > > > sameerds wrote:
> > > > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory 
> > > > > models. They should not be used with the new builtin because this new 
> > > > > builtin does not follow any specific language model. For user 
> > > > > convenience, the right thing to do is to introduce new tokens in the 
> > > > > Clang preprocessor, similar to the `__ATOMIC_*` tokens. The 
> > > > > convenient shortcut is to just tell the user to supply numerical 
> > > > > values by looking at the LLVM source code.
> > > > > 
> > > > > From llvm/Support/AtomicOrdering.h, note how the numerical value for 
> > > > > `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > > > > SequentiallyConsistent ordering is 7. The numerical value 5 refers to 
> > > > > the LLVM ordering "release". So, if the implementation were correct, 
> > > > > this line should result in the following unexpected LLVM IR:
> > > > >   fence syncscope("workgroup") release
> > > > As you pointed out, the range of acquire to sequentiallly consistent 
> > > > memory orders for llvm::AtomicOrdering is [4, 7], while for 
> > > > llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was taken to ensure 
> > > > easy of use for the users who are familiar with C/C++ standard memory 
> > > > model. It allows them to use macros like __ATOMIC_ACQUIRE etc.
> > > > Clang CodeGen of the builtin internally maps C ABI ordering to llvm 
> > > > atomic ordering.
> > > What language, implemented in clang, do you have in mind that reusing the 
> > > existing __ATOMIC_* macros would be incorrect for?
> > I think we agreed that this builtin exposes the LLVM fence exactly. That 
> > would mean it takes arguments defined by LLVM. If you are implementing 
> > something different from that, then it first needs to be specified 
> > properly. Perhaps you could say that this is a C ABI compatible builtin, 
> > that happens to take target specific scopes? That should cover OpenCL whose 
> > scope enum is designed to be compatible with C.
> > 
> > Whatever it is that you are trying to implement here, it definitely does 
> > not expose a raw LLVM fence.
> The llvm fence, in text form, uses a symbol for the memory scope. Not an enum.
> 
> This symbol is set using these macros for the existing atomic builtins. Using 
> an implementation detail of clang instead is strictly worse, by layering and 
> by precedent.
> 
> ABI is not involved here. Nor is OpenCl.
The `__ATOMIC_*` symbols in Clang quite literally represent the C/C++ ABI. See 
the details in AtomicOrdering.h and InitPreprocessor.cpp. I am not opposed to 
specifying that the builtin expects these symbols, but then it is incorrect to 
say that the builtin exposes the raw LLVM builtin. It is a C-ABI-compatible 
builtin that happens to take target-specific scope as a string argument. And 
that would also make it an overload of the already existing builting 
__atomic_fence().


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13651
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+

sameerds wrote:
> saiislam wrote:
> > sameerds wrote:
> > > This seems to be creating a new ID for any arbitrary string passed as 
> > > sync scope. This should be validated against 
> > > LLVMContext::getSyncScopeNames(). 
> > As the FE is not aware about specific target and implementation of sync 
> > scope for that target, getSyncScopeNames() here returns llvm'sdefault sync 
> > scopes, which only supports singlethreaded and system as valid scopes. 
> > Validity checking of memory scope string is being intentionally left for 
> > the later stages which deal with the generated IR.
> That's pretty strange. At this point, Clang should know what the target is, 
> and it should have a chance to update the list of sync scopes somewhere. 
> @arsenm, do you see a way around this?
There is already sufficient IR level checking for the string at the instruction 
level. Warning in clang as well could be a nicer user experience, but that 
seems low priority to me.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

sameerds wrote:
> JonChesterfield wrote:
> > saiislam wrote:
> > > sameerds wrote:
> > > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory models. 
> > > > They should not be used with the new builtin because this new builtin 
> > > > does not follow any specific language model. For user convenience, the 
> > > > right thing to do is to introduce new tokens in the Clang preprocessor, 
> > > > similar to the `__ATOMIC_*` tokens. The convenient shortcut is to just 
> > > > tell the user to supply numerical values by looking at the LLVM source 
> > > > code.
> > > > 
> > > > From llvm/Support/AtomicOrdering.h, note how the numerical value for 
> > > > `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > > > SequentiallyConsistent ordering is 7. The numerical value 5 refers to 
> > > > the LLVM ordering "release". So, if the implementation were correct, 
> > > > this line should result in the following unexpected LLVM IR:
> > > >   fence syncscope("workgroup") release
> > > As you pointed out, the range of acquire to sequentiallly consistent 
> > > memory orders for llvm::AtomicOrdering is [4, 7], while for 
> > > llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was taken to ensure 
> > > easy of use for the users who are familiar with C/C++ standard memory 
> > > model. It allows them to use macros like __ATOMIC_ACQUIRE etc.
> > > Clang CodeGen of the builtin internally maps C ABI ordering to llvm 
> > > atomic ordering.
> > What language, implemented in clang, do you have in mind that reusing the 
> > existing __ATOMIC_* macros would be incorrect for?
> I think we agreed that this builtin exposes the LLVM fence exactly. That 
> would mean it takes arguments defined by LLVM. If you are implementing 
> something different from that, then it first needs to be specified properly. 
> Perhaps you could say that this is a C ABI compatible builtin, that happens 
> to take target specific scopes? That should cover OpenCL whose scope enum is 
> designed to be compatible with C.
> 
> Whatever it is that you are trying to implement here, it definitely does not 
> expose a raw LLVM fence.
The llvm fence, in text form, uses a symbol for the memory scope. Not an enum.

This symbol is set using these macros for the existing atomic builtins. Using 
an implementation detail of clang instead is strictly worse, by layering and by 
precedent.

ABI is not involved here. Nor is OpenCl.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13651
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+

saiislam wrote:
> sameerds wrote:
> > This seems to be creating a new ID for any arbitrary string passed as sync 
> > scope. This should be validated against LLVMContext::getSyncScopeNames(). 
> As the FE is not aware about specific target and implementation of sync scope 
> for that target, getSyncScopeNames() here returns llvm'sdefault sync scopes, 
> which only supports singlethreaded and system as valid scopes. Validity 
> checking of memory scope string is being intentionally left for the later 
> stages which deal with the generated IR.
That's pretty strange. At this point, Clang should know what the target is, and 
it should have a chance to update the list of sync scopes somewhere. @arsenm, 
do you see a way around this?



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

JonChesterfield wrote:
> saiislam wrote:
> > sameerds wrote:
> > > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory models. 
> > > They should not be used with the new builtin because this new builtin 
> > > does not follow any specific language model. For user convenience, the 
> > > right thing to do is to introduce new tokens in the Clang preprocessor, 
> > > similar to the `__ATOMIC_*` tokens. The convenient shortcut is to just 
> > > tell the user to supply numerical values by looking at the LLVM source 
> > > code.
> > > 
> > > From llvm/Support/AtomicOrdering.h, note how the numerical value for 
> > > `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > > SequentiallyConsistent ordering is 7. The numerical value 5 refers to the 
> > > LLVM ordering "release". So, if the implementation were correct, this 
> > > line should result in the following unexpected LLVM IR:
> > >   fence syncscope("workgroup") release
> > As you pointed out, the range of acquire to sequentiallly consistent memory 
> > orders for llvm::AtomicOrdering is [4, 7], while for 
> > llvm::AtomicOrderingCABI is [2, 5]. Enums of C ABI was taken to ensure easy 
> > of use for the users who are familiar with C/C++ standard memory model. It 
> > allows them to use macros like __ATOMIC_ACQUIRE etc.
> > Clang CodeGen of the builtin internally maps C ABI ordering to llvm atomic 
> > ordering.
> What language, implemented in clang, do you have in mind that reusing the 
> existing __ATOMIC_* macros would be incorrect for?
I think we agreed that this builtin exposes the LLVM fence exactly. That would 
mean it takes arguments defined by LLVM. If you are implementing something 
different from that, then it first needs to be specified properly. Perhaps you 
could say that this is a C ABI compatible builtin, that happens to take target 
specific scopes? That should cover OpenCL whose scope enum is designed to be 
compatible with C.

Whatever it is that you are trying to implement here, it definitely does not 
expose a raw LLVM fence.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 255288.
saiislam added a comment.

Changes:

1. Moved builtin definition with rest of atomic builtins
2. Updated validity checking of memory order using exact mathches instead of 
range matching
3. Added a sucessful test case which passes arbitrary string scope
4. Corrected formatting


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/Builtins.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp
  clang/test/Sema/builtins.c

Index: clang/test/Sema/builtins.c
===
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -322,13 +322,13 @@
 }
 
 void test_memory_fence_errors() {
-  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup");  // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
 
-  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup");  // expected-warning {{memory order argument to atomic operation is invalid}}
+  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
 
-  __builtin_memory_fence(4);  // expected-error {{too few arguments to function call, expected 2}}
+  __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}}
 
-  __builtin_memory_fence(4, 4, 4);  // expected-error {{too many arguments to function call, expected 2}}
+  __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
 
-  __builtin_memory_fence(3.14, "");  // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+  __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
 }
Index: clang/test/CodeGenHIP/builtin_memory_fence.cpp
===
--- clang/test/CodeGenHIP/builtin_memory_fence.cpp
+++ clang/test/CodeGenHIP/builtin_memory_fence.cpp
@@ -19,4 +19,7 @@
 
 // CHECK: fence syncscope("workgroup") release
   __builtin_memory_fence(3, "workgroup");
+
+  // CHECK: fence syncscope("foobar") release
+  __builtin_memory_fence(3, "foobar");
 }
\ No newline at end of file
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1869,8 +1869,7 @@
   ? "__builtin_return_address"
   : "__builtin_frame_address")
   << TheCall->getSourceRange();
-  }
-break;
+  } break;
 
   case Builtin::BI__builtin_memory_fence: {
 ExprResult Arg = TheCall->getArg(0);
@@ -1878,22 +1877,27 @@
 Expr::EvalResult ArgResult;
 
 if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) {
-  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) <<
-   ArgExpr->getType();
+  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+<< ArgExpr->getType();
   return ExprError();
 }
 int ord = ArgResult.Val.getInt().getZExtValue();
 
 // Check valididty of memory ordering as per C11 / C++11's memody model.
-if (ord < static_cast(llvm::AtomicOrderingCABI::acquire) ||
-  ord > static_cast(llvm::AtomicOrderingCABI::seq_cst)) {
-  Diag(ArgExpr->getBeginLoc(),
-  diag::warn_atomic_op_has_invalid_memory_order)
-  << ArgExpr->getSourceRange();
-  return ExprError();
-}
+switch (static_cast(ord)) {
+  case llvm::AtomicOrderingCABI::acquire:
+  case llvm::AtomicOrderingCABI::release:
+  case llvm::AtomicOrderingCABI::acq_rel:
+  case llvm::AtomicOrderingCABI::seq_cst:
+break;
+  default: {
+Diag(ArgExpr->getBeginLoc(),
+diag::warn_atomic_op_has_invalid_memory_order)
+  << ArgExpr->getSourceRange();
+return ExprError();
+  }
 }
-break;
+} break;
   }
 
   // Since the target specific builtins for each arch overlap, only check those
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13624,7 +13624,7 @@
 Value *Order = EmitScalarExpr(E->getArg(0));
 Value *Scope = EmitScalarExpr(E->getArg(1));
 
-if ( isa(Order)) {
+if (isa(Order)) {
   int ord = cast(Order)->getZExtValue();
 
   // Map C11/C++11 memory ordering to LLVM memory ordering
Index: clang/include/clang/Basic/Builtins.def
===
---

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/lib/Sema/SemaChecking.cpp:1888
+// Check valididty of memory ordering as per C11 / C++11's memody model.
+if (ord < static_cast(llvm::AtomicOrderingCABI::acquire) ||
+  ord > static_cast(llvm::AtomicOrderingCABI::seq_cst)) {

I think I'd write this as a switch over the enum instead of a ranged compare.

It'll codegen to the same thing, but we'll get warnings if more values are 
introduced to the enum and things will keep working (here, anyway) if the 
values are reordered.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

saiislam wrote:
> sameerds wrote:
> > Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory models. They 
> > should not be used with the new builtin because this new builtin does not 
> > follow any specific language model. For user convenience, the right thing 
> > to do is to introduce new tokens in the Clang preprocessor, similar to the 
> > `__ATOMIC_*` tokens. The convenient shortcut is to just tell the user to 
> > supply numerical values by looking at the LLVM source code.
> > 
> > From llvm/Support/AtomicOrdering.h, note how the numerical value for 
> > `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> > SequentiallyConsistent ordering is 7. The numerical value 5 refers to the 
> > LLVM ordering "release". So, if the implementation were correct, this line 
> > should result in the following unexpected LLVM IR:
> >   fence syncscope("workgroup") release
> As you pointed out, the range of acquire to sequentiallly consistent memory 
> orders for llvm::AtomicOrdering is [4, 7], while for llvm::AtomicOrderingCABI 
> is [2, 5]. Enums of C ABI was taken to ensure easy of use for the users who 
> are familiar with C/C++ standard memory model. It allows them to use macros 
> like __ATOMIC_ACQUIRE etc.
> Clang CodeGen of the builtin internally maps C ABI ordering to llvm atomic 
> ordering.
What language, implemented in clang, do you have in mind that reusing the 
existing __ATOMIC_* macros would be incorrect for?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-06 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam marked 5 inline comments as done.
saiislam added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13630
+
+  // Map C11/C++11 memory ordering to LLVM memory ordering
+  switch (static_cast(ord)) {

sameerds wrote:
> There should no mention of any high-level language here. The correct enum to 
> validate against is llvm::AtomicOrdering from llvm/Support/AtomicOrdering.h, 
> and not the C ABI or any other language ABI.
Even though this builtin is supposed to be language-independent, here intention 
was to provide interface in terms of well known standard C11/C++11 enums for 
memory order (__ATOMIC_ACQUIRE, etc.), so that user of the builtin don't have 
to remember and modify their code. The builtin internally maps it as per the 
expectation of fence instruction.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13651
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+

sameerds wrote:
> This seems to be creating a new ID for any arbitrary string passed as sync 
> scope. This should be validated against LLVMContext::getSyncScopeNames(). 
As the FE is not aware about specific target and implementation of sync scope 
for that target, getSyncScopeNames() here returns llvm'sdefault sync scopes, 
which only supports singlethreaded and system as valid scopes. Validity 
checking of memory scope string is being intentionally left for the later 
stages which deal with the generated IR.



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

sameerds wrote:
> Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory models. They 
> should not be used with the new builtin because this new builtin does not 
> follow any specific language model. For user convenience, the right thing to 
> do is to introduce new tokens in the Clang preprocessor, similar to the 
> `__ATOMIC_*` tokens. The convenient shortcut is to just tell the user to 
> supply numerical values by looking at the LLVM source code.
> 
> From llvm/Support/AtomicOrdering.h, note how the numerical value for 
> `__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
> SequentiallyConsistent ordering is 7. The numerical value 5 refers to the 
> LLVM ordering "release". So, if the implementation were correct, this line 
> should result in the following unexpected LLVM IR:
>   fence syncscope("workgroup") release
As you pointed out, the range of acquire to sequentiallly consistent memory 
orders for llvm::AtomicOrdering is [4, 7], while for llvm::AtomicOrderingCABI 
is [2, 5]. Enums of C ABI was taken to ensure easy of use for the users who are 
familiar with C/C++ standard memory model. It allows them to use macros like 
__ATOMIC_ACQUIRE etc.
Clang CodeGen of the builtin internally maps C ABI ordering to llvm atomic 
ordering.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-05 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds requested changes to this revision.
sameerds added inline comments.
This revision now requires changes to proceed.



Comment at: clang/include/clang/Basic/Builtins.def:1583
+// Second argument : target specific sync scope string
+BUILTIN(__builtin_memory_fence, "vUicC*", "n")
+

This should be moved to be near line 786,  where atomic builtins are listed 
under the comment "// GCC does not support these, they are a Clang extension."



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13630
+
+  // Map C11/C++11 memory ordering to LLVM memory ordering
+  switch (static_cast(ord)) {

There should no mention of any high-level language here. The correct enum to 
validate against is llvm::AtomicOrdering from llvm/Support/AtomicOrdering.h, 
and not the C ABI or any other language ABI.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:13651
+  llvm::getConstantStringInfo(Scope, scp);
+  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+

This seems to be creating a new ID for any arbitrary string passed as sync 
scope. This should be validated against LLVMContext::getSyncScopeNames(). 



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:5
+
+void test_memory_fence_success() {
+// CHECK-LABEL: test_memory_fence_success

There should be a line that tries to do:
  __builtin_memory_fence(__ATOMIC_SEQ_CST, "foobar");



Comment at: clang/test/CodeGenHIP/builtin_memory_fence.cpp:9
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  

Orderings like `__ATOMIC_SEQ_CST` are defined for C/C++ memory models. They 
should not be used with the new builtin because this new builtin does not 
follow any specific language model. For user convenience, the right thing to do 
is to introduce new tokens in the Clang preprocessor, similar to the 
`__ATOMIC_*` tokens. The convenient shortcut is to just tell the user to supply 
numerical values by looking at the LLVM source code.

From llvm/Support/AtomicOrdering.h, note how the numerical value for 
`__ATOMIC_SEQ_CST` is 5, but the numerical value for the LLVM 
SequentiallyConsistent ordering is 7. The numerical value 5 refers to the LLVM 
ordering "release". So, if the implementation were correct, this line should 
result in the following unexpected LLVM IR:
  fence syncscope("workgroup") release


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-05 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam updated this revision to Diff 255173.
saiislam added a comment.

Removed OpenCL specific dependencies

Now it takes target-specific sync scope as an string.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/Builtins.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenHIP/builtin_memory_fence.cpp
  clang/test/Sema/builtins.c

Index: clang/test/Sema/builtins.c
===
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -320,3 +320,15 @@
   // expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}}
   return __builtin_is_constant_evaluated();
 }
+
+void test_memory_fence_errors() {
+  __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup");  // expected-warning {{memory order argument to atomic operation is invalid}}
+
+  __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup");  // expected-warning {{memory order argument to atomic operation is invalid}}
+
+  __builtin_memory_fence(4);  // expected-error {{too few arguments to function call, expected 2}}
+
+  __builtin_memory_fence(4, 4, 4);  // expected-error {{too many arguments to function call, expected 2}}
+
+  __builtin_memory_fence(3.14, "");  // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/CodeGenHIP/builtin_memory_fence.cpp
===
--- /dev/null
+++ clang/test/CodeGenHIP/builtin_memory_fence.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa  | opt -instnamer -S | FileCheck %s
+
+void test_memory_fence_success() {
+// CHECK-LABEL: test_memory_fence_success
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST,  "workgroup");
+  
+   // CHECK: fence syncscope("agent") acquire
+  __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
+
+  // CHECK: fence seq_cst
+  __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
+
+  // CHECK: fence syncscope("agent") acq_rel
+  __builtin_memory_fence(4, "agent");
+
+// CHECK: fence syncscope("workgroup") release
+  __builtin_memory_fence(3, "workgroup");
+}
\ No newline at end of file
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1855,7 +1855,7 @@
   return ExprError();
 break;
   case Builtin::BI__builtin_frame_address:
-  case Builtin::BI__builtin_return_address:
+  case Builtin::BI__builtin_return_address: {
 if (SemaBuiltinConstantArgRange(TheCall, 0, 0, 0x))
   return ExprError();
 
@@ -1869,6 +1869,30 @@
   ? "__builtin_return_address"
   : "__builtin_frame_address")
   << TheCall->getSourceRange();
+  }
+break;
+
+  case Builtin::BI__builtin_memory_fence: {
+ExprResult Arg = TheCall->getArg(0);
+auto ArgExpr = Arg.get();
+Expr::EvalResult ArgResult;
+
+if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) {
+  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) <<
+   ArgExpr->getType();
+  return ExprError();
+}
+int ord = ArgResult.Val.getInt().getZExtValue();
+
+// Check valididty of memory ordering as per C11 / C++11's memody model.
+if (ord < static_cast(llvm::AtomicOrderingCABI::acquire) ||
+  ord > static_cast(llvm::AtomicOrderingCABI::seq_cst)) {
+  Diag(ArgExpr->getBeginLoc(),
+  diag::warn_atomic_op_has_invalid_memory_order)
+  << ArgExpr->getSourceRange();
+  return ExprError();
+}
+}
 break;
   }
 
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
@@ -13616,6 +13617,43 @@
 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
 return Builder.CreateCall(F, { Src0, Src1, Src2 });
   }
+
+  case Builtin::BI__builtin_memory_fence: {
+llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
+llvm::SyncScope::ID SSID;
+Value *Order = EmitScalarExpr(E->getArg(0));
+Value *Scope = EmitScalarExpr(E->getArg(1));
+
+if ( isa(Order)) {
+  int ord = cast(Order)->getZExtValue();
+
+  // Map C11/C++11 memory ordering to LLVM memory ordering
+  switch (static_cast(ord)) {
+  case llvm::AtomicOrderingCABI::acquire:
+AO =

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-04-02 Thread Brian Sumner via Phabricator via cfe-commits

b-sumner added a comment.

Please go ahead and update to a string for the scope.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-30 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

Can this be revived? Changing the enum to a string still sounds good to me


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-16 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added a comment.

In D75917#1925700 , @JonChesterfield 
wrote:

>

> I think I follow. The syncscope takes a string, therefore the builtin that 
> maps onto fence should also take a string for that parameter? That's fine by 
> me. Will help if a new non-opencl syncscope is introduced later.

Right. To be precise, it is a target-specific string, and should not be 
processed as if it was an OpenCL scope. The builtin should allow anything that 
the IR fence would allow in a .ll file created for the specified target.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-16 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

In D75917#1925617 , @sameerds wrote:

> Is it really? The scope argument of the IR fence is a target-specific string:
>  http://llvm.org/docs/LangRef.html#syncscope
>
> The change that I see here is assuming a numerical argument, and also 
> assuming that the numbers used must conform to the OpenCL enum. That would 
> certainly make the builtin quite different from the IR fence.


I think I follow. The syncscope takes a string, therefore the builtin that maps 
onto fence should also take a string for that parameter? That's fine by me. 
Will help if a new non-opencl syncscope is introduced later.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-16 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added a comment.

In D75917#1917296 , @JonChesterfield 
wrote:

> In D75917#1916972 , @sameerds wrote:
>
> > Well, there is a problem: The LangRef says that scopes are target-defined. 
> > This change says that scopes are defined by the high-level language and 
> > further assumes that OpenCL scopes make sense in all languages. Besides 
> > conflicting with the LangRef, this not seem to work with C++, which has no 
> > scopes and nor with CUDA or HIP, whose scopes are not represented in any 
> > AtomicScopeModel.
>
>
> I don't follow. IR has a fence instruction. This builtin maps directly to it, 
> passing whatever integer arguments were given to the intrinsic along 
> unchanged. It's exactly as valid, or invalid, as said fence instruction.


Is it really? The scope argument of the IR fence is a target-specific string:
http://llvm.org/docs/LangRef.html#syncscope

The change that I see here is assuming a numerical argument, and also assuming 
that the numbers used must conform to the OpenCL enum. That would certainly 
make the builtin quite different from the IR fence.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-11 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

In D75917#1916972 , @sameerds wrote:

> Well, there is a problem: The LangRef says that scopes are target-defined. 
> This change says that scopes are defined by the high-level language and 
> further assumes that OpenCL scopes make sense in all languages. Besides 
> conflicting with the LangRef, this not seem to work with C++, which has no 
> scopes and nor with CUDA or HIP, whose scopes are not represented in any 
> AtomicScopeModel.

I don't follow. IR has a fence instruction. This builtin maps directly to it, 
passing whatever interfer arguments were given to the intrinsic along 
unchanged. It's exactly as valid, or invalid, as said fence instruction.

Are you objecting to passing enums in the test cases instead of raw integers?

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-11 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added a comment.

In D75917#1916664 , @JonChesterfield 
wrote:

> In D75917#1916160 , @sameerds wrote:
>
> > how this builtin fits in with the overall scheme of language-specific and 
> > target-specific details of an atomic operation. For example, is this meant 
> > only for OpenCL? Does it work with CUDA? Or HIP? What is the behaviour for 
> > scope in C++?
>
>
> Identical to the fence instruction. Which is assumed well thought through 
> already, given it's an IR instruction.
>
> As far as I can tell, fence composes sensibly with other IR then generates 
> the right thing at the back end. So it looks fit for purpose, just not 
> currently available from clang.

Well, there is a problem: The LangRef says that scopes are target-defined. This 
change says that scopes are defined by the high-level language and further 
assumes that OpenCL scopes make sense in all languages. Besides conflicting 
with the LangRef, this not seem to work with C++, which has no scopes and nor 
with CUDA or HIP, whose scopes are not represented in any AtomicScopeModel.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-11 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a comment.

In D75917#1916160 , @sameerds wrote:

> how this builtin fits in with the overall scheme of language-specific and 
> target-specific details of an atomic operation. For example, is this meant 
> only for OpenCL? Does it work with CUDA? Or HIP? What is the behaviour for 
> scope in C++?


Identical to the fence instruction. Which is assumed well thought through 
already, given it's an IR instruction.

As far as I can tell, fence composes sensibly with other IR then generates the 
right thing at the back end. So it looks fit for purpose, just not currently 
available from clang.




Comment at: clang/lib/CodeGen/CGBuiltin.cpp:3707
+Value *Scope = EmitScalarExpr(E->getArg(1));
+auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
+

sameerds wrote:
> The proposed builtin does not claim to be an OpenCL builtin, so it's probably 
> not correct to simply assume the OpenCL model. Should the model be chosen 
> based on the source language specified?
The only values for AtomicScopeModelKind are none and OpenCL.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Sameer Sahasrabuddhe via Phabricator via cfe-commits

sameerds added a comment.

The commit summary needs improvement. The syntax is not really necessary there, 
but instead this needs a better explanation of how this builtin fits in with 
the overall scheme of language-specific and target-specific details of an 
atomic operation. For example, is this meant only for OpenCL? Does it work with 
CUDA? Or HIP? What is the behaviour for scope in C++?




Comment at: clang/include/clang/Basic/DiagnosticSemaKinds.td:7860-7863
+def err_memory_fence_has_invalid_memory_order : Error<
+  "memory order argument to fence operation is invalid">;
+def err_memory_fence_has_invalid_synch_scope : Error<
+  "synchronization scope argument to fence operation is invalid">;

Just above this addition, atomic op seems to emit a warning for invalid memory 
order. Should that be the case with fence too?



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:3707
+Value *Scope = EmitScalarExpr(E->getArg(1));
+auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
+

The proposed builtin does not claim to be an OpenCL builtin, so it's probably 
not correct to simply assume the OpenCL model. Should the model be chosen based 
on the source language specified?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Anastasia Stulova via Phabricator via cfe-commits

Anastasia added inline comments.



Comment at: clang/lib/Sema/SemaChecking.cpp:1880
+// Check if Order is an unsigned
+if (!Ty->isIntegerType()) {
+  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_uint) << Ty;

JonChesterfield wrote:
> isIntegerType will return true for signed integers as well as unsigned. It 
> seems reasonable to call this with a signed integer type (e.g. '2'), so 
> perhaps the references to unsigned should be dropped from the code and error 
> message
> 
I think we should accept implicit type conversion rules from the language...


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added a subscriber: jdoerfert.
JonChesterfield added a comment.

@jdoerfert this is one of the two intrinsics needed to drop the .ll source from 
the amdgcn deviceRTL. The other is atomic_inc.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/include/clang/Basic/Builtins.def:1516
 
+// Builtin to expose llvm fence instruction
+BUILTIN(__builtin_memory_fence, "vUiUi", "t")

`BUILTIN(__builtin_memory_fence, "vii", "n")`?

The other fence intrinsics (e.g. __c11_atomic_thread_fence) take signed 
integers and rely on the built in type checking, which seems reasonable here too


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:3713
+  switch (ord) {
+  case 0:  // memory_order_relaxed
+  default: // invalid order

Interesting, fence can't be relaxed

> ‘fence’ instructions take an ordering argument which defines what 
> synchronizes-with edges they add. They can only be given acquire, release, 
> acq_rel, and seq_cst orderings.





Comment at: clang/lib/Sema/SemaChecking.cpp:1880
+// Check if Order is an unsigned
+if (!Ty->isIntegerType()) {
+  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_uint) << Ty;

isIntegerType will return true for signed integers as well as unsigned. It 
seems reasonable to call this with a signed integer type (e.g. '2'), so perhaps 
the references to unsigned should be dropped from the code and error message




Comment at: clang/lib/Sema/SemaChecking.cpp:1889
+
+// Check if Order is one of the valid types
+if (!llvm::isValidAtomicOrderingCABI(ord)) {

This should reject 'relaxed' - I think that's currently accepted by sema then 
silently thrown away by codegen



Comment at: clang/test/CodeGenOpenCL/atomic-ops.cl:291
 
+void test_memory_fence() {
+  // CHECK-LABEL: @test_memory_fence

I'm hoping this intrinsic will be usable from C or C++, as well as from OpenCL 
- please could you add a non-opencl codegen test.

It doesn't need to check all the cases again, just enough to show that the 
intrinsic and arguments are available (they're spelled like `__ATOMIC_SEQ_CST`, 
`__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES` outside of opencl)



Comment at: clang/test/SemaOpenCL/atomic-ops.cl:198
+
+void memory_fence_errors() {
+  __builtin_memory_fence(memory_order_seq_cst + 1, memory_scope_work_group); 
// expected-error {{memory order argument to fence operation is invalid}}

A happy case too please, e.g. to show that it accepts a couple of integers. 
Looks like ` __builtin_memory_fence(2, 2);` but without an expected-error 
comment


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75917/new/

https://reviews.llvm.org/D75917



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75917: Expose llvm fence instruction as clang intrinsic

2020-03-10 Thread Saiyedul Islam via Phabricator via cfe-commits

saiislam created this revision.
saiislam added reviewers: JonChesterfield, sameerds, yaxunl, gregrodgers, 
b-sumner.
Herald added subscribers: cfe-commits, jfb.
Herald added a project: clang.

"__buitlin_memory_fence(, )" in clang gets
mapped to "fence [syncscope("")] ; yields void"
in IR.


Repository:
  rC Clang

https://reviews.llvm.org/D75917

Files:
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenOpenCL/atomic-ops.cl
  clang/test/SemaOpenCL/atomic-ops.cl

Index: clang/test/SemaOpenCL/atomic-ops.cl
===
--- clang/test/SemaOpenCL/atomic-ops.cl
+++ clang/test/SemaOpenCL/atomic-ops.cl
@@ -194,3 +194,21 @@
   // The 'expected' pointer shouldn't be NULL.
   (void)__opencl_atomic_compare_exchange_strong(Ap, (void *)0, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); // expected-warning {{null passed to a callee that requires a non-null argument}}
 }
+
+void memory_fence_errors() {
+  __builtin_memory_fence(memory_order_seq_cst + 1, memory_scope_work_group); // expected-error {{memory order argument to fence operation is invalid}}
+
+  __builtin_memory_fence(memory_order_seq_cst, memory_scope_sub_group + 1); // expected-error {{synchronization scope argument to fence operation is invalid}}
+
+  __builtin_memory_fence(memory_scope_work_group); // expected-error {{too few arguments to function call, expected 2}}
+
+  __builtin_memory_fence(2, 2, 2); // expected-error {{too many arguments to function call, expected 2}}
+
+  __builtin_memory_fence("string", memory_scope_work_group); // expected-error {{used type '__constant char [7]' where unsigned is required}}
+
+  __builtin_memory_fence(3.14, memory_scope_work_group); // expected-error {{used type 'double' where unsigned is required}}
+
+  __builtin_memory_fence(memory_order_seq_cst, "string"); // expected-error {{used type '__constant char [7]' where unsigned is required}}
+
+  __builtin_memory_fence(memory_order_seq_cst, 3.14); // expected-error {{used type 'double' where unsigned is required}}
+}
Index: clang/test/CodeGenOpenCL/atomic-ops.cl
===
--- clang/test/CodeGenOpenCL/atomic-ops.cl
+++ clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -288,4 +288,44 @@
   return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
 }
 
+void test_memory_fence() {
+  // CHECK-LABEL: @test_memory_fence
+
+  // CHECK: fence syncscope("workgroup-one-as") acquire
+  __builtin_memory_fence(memory_order_acquire, memory_scope_work_group);
+
+  // CHECK: fence syncscope("agent-one-as") acquire
+  __builtin_memory_fence(memory_order_acquire, memory_scope_device);
+
+  // CHECK: fence syncscope("one-as") acquire
+  __builtin_memory_fence(memory_order_acquire, memory_scope_all_svm_devices);
+
+  // CHECK: fence syncscope("wavefront-one-as") acquire
+  __builtin_memory_fence(memory_order_acquire, memory_scope_sub_group);
+
+  // CHECK: fence syncscope("workgroup-one-as") release
+  __builtin_memory_fence(memory_order_release,  memory_scope_work_group);
+
+  // CHECK: fence syncscope("agent-one-as") release
+  __builtin_memory_fence(memory_order_release, memory_scope_device);
+
+  // CHECK: fence syncscope("one-as") release
+  __builtin_memory_fence(memory_order_release, memory_scope_all_svm_devices);
+
+  // CHECK: fence syncscope("wavefront-one-as") release
+  __builtin_memory_fence(memory_order_release, memory_scope_sub_group);
+
+  // CHECK: fence syncscope("workgroup") seq_cst
+  __builtin_memory_fence(memory_order_seq_cst,  memory_scope_work_group);
+
+  // CHECK: fence syncscope("agent") seq_cst
+  __builtin_memory_fence(memory_order_seq_cst, memory_scope_device);
+
+  // CHECK: fence seq_cst
+  __builtin_memory_fence(memory_order_seq_cst, memory_scope_all_svm_devices);
+
+  // CHECK: fence syncscope("wavefront") seq_cst
+  __builtin_memory_fence(memory_order_seq_cst, memory_scope_sub_group);
+}
+
 #endif
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1865,6 +1865,60 @@
   : "__builtin_frame_address")
   << TheCall->getSourceRange();
 break;
+
+  // Clang builtins to expose llvm fence instruction
+  case Builtin::BI__builtin_memory_fence: {
+if (checkArgCount(*this, TheCall, 2))
+  return true;
+
+// Order should be the first argument
+ExprResult Arg = TheCall->getArg(0);
+auto ArgExpr = Arg.get();
+auto Ty = ArgExpr->getType();
+
+// Check if Order is an unsigned
+if (!Ty->isIntegerType()) {
+  Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_uint) << Ty;
+  return ExprError();
+}
+
+Expr::EvalResult ArgResult;
+ArgExpr->EvaluateAsInt(ArgResult, Context);
+int

44 matches

Mail list logo