[clang] f253bb6 - [WebAssembly] Fix simd bit shift intrinsics codegen

2023-02-16 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2023-02-17T09:17:47+08:00
New Revision: f253bb640d97756d2808bb7c7b2bb31b1090a654

URL: 
https://github.com/llvm/llvm-project/commit/f253bb640d97756d2808bb7c7b2bb31b1090a654
DIFF: 
https://github.com/llvm/llvm-project/commit/f253bb640d97756d2808bb7c7b2bb31b1090a654.diff

LOG: [WebAssembly] Fix simd bit shift intrinsics codegen

According to github.com/WebAssembly/simd/blob/main/proposals/simd/SIMD.md,
the shift count of bit shift instructions is taken modulo lane width.
This patch adds such operation.

Fixes PR#60655

Differential Revision: https://reviews.llvm.org/D144169

Added: 


Modified: 
clang/lib/Headers/wasm_simd128.h
clang/test/Headers/wasm.c

Removed: 




diff  --git a/clang/lib/Headers/wasm_simd128.h 
b/clang/lib/Headers/wasm_simd128.h
index f93de129f9577..a099ab57dc68b 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -961,17 +961,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS 
wasm_i8x16_popcnt(v128_t __a) {
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i8x16)__a << __b);
+  return (v128_t)((__i8x16)__a << (__b & 0x7));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i8x16)__a >> __b);
+  return (v128_t)((__i8x16)__a >> (__b & 0x7));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__u8x16)__a >> __b);
+  return (v128_t)((__u8x16)__a >> (__b & 0x7));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t __a,
@@ -1047,17 +1047,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS 
wasm_i16x8_bitmask(v128_t __a) {
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i16x8)__a << __b);
+  return (v128_t)((__i16x8)__a << (__b & 0xF));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i16x8)__a >> __b);
+  return (v128_t)((__i16x8)__a >> (__b & 0xF));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__u16x8)__a >> __b);
+  return (v128_t)((__u16x8)__a >> (__b & 0xF));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t __a,
@@ -1138,17 +1138,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS 
wasm_i32x4_bitmask(v128_t __a) {
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i32x4)__a << __b);
+  return (v128_t)((__i32x4)__a << (__b & 0x1F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i32x4)__a >> __b);
+  return (v128_t)((__i32x4)__a >> (__b & 0x1F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__u32x4)__a >> __b);
+  return (v128_t)((__u32x4)__a >> (__b & 0x1F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t __a,
@@ -1209,17 +1209,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS 
wasm_i64x2_bitmask(v128_t __a) {
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i64x2)__a << (int64_t)__b);
+  return (v128_t)((__i64x2)__a << ((int64_t)__b & 0x3F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__i64x2)__a >> (int64_t)__b);
+  return (v128_t)((__i64x2)__a >> ((int64_t)__b & 0x3F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a,
uint32_t __b) {
-  return (v128_t)((__u64x2)__a >> (int64_t)__b);
+  return (v128_t)((__u64x2)__a >> ((int64_t)__b & 0x3F));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t __a,

diff  --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 42e11e412ea4a..a755499c6c797 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1584,11 +1584,12 @@ v128_t test_i8x16_popcnt(v128_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i32> 

[clang] e073de8 - [WebAssembly] Update wasm.c with update_cc_test_checks.py. NFC

2023-02-16 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2023-02-17T09:17:47+08:00
New Revision: e073de833671159f89d72a1018d335cc042d

URL: 
https://github.com/llvm/llvm-project/commit/e073de833671159f89d72a1018d335cc042d
DIFF: 
https://github.com/llvm/llvm-project/commit/e073de833671159f89d72a1018d335cc042d.diff

LOG: [WebAssembly] Update wasm.c with update_cc_test_checks.py. NFC

Added: 


Modified: 
clang/test/Headers/wasm.c

Removed: 




diff  --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 8913a273aaeb1..42e11e412ea4a 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --force-update
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: webassembly-registered-target, asserts
 
 // FIXME: This should not be using -O2 and implicitly testing the entire IR 
opt pipeline.
@@ -1462,8 +1462,8 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_v128_not(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[NEG_I:%.*]] = xor <4 x i32> [[A:%.*]], 
-// CHECK-NEXT:ret <4 x i32> [[NEG_I]]
+// CHECK-NEXT:[[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], 
+// CHECK-NEXT:ret <4 x i32> [[NOT_I]]
 //
 v128_t test_v128_not(v128_t a) {
   return wasm_v128_not(a);
@@ -1498,8 +1498,8 @@ v128_t test_v128_xor(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_v128_andnot(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[NEG_I:%.*]] = xor <4 x i32> [[B:%.*]], 
-// CHECK-NEXT:[[AND_I:%.*]] = and <4 x i32> [[NEG_I]], [[A:%.*]]
+// CHECK-NEXT:[[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], 
+// CHECK-NEXT:[[AND_I:%.*]] = and <4 x i32> [[NOT_I]], [[A:%.*]]
 // CHECK-NEXT:ret <4 x i32> [[AND_I]]
 //
 v128_t test_v128_andnot(v128_t a, v128_t b) {
@@ -1529,9 +1529,9 @@ v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t 
mask) {
 // CHECK-LABEL: @test_i8x16_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:[[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> 
[[TMP0]], i1 false)
-// CHECK-NEXT:[[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:[[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x 
i8> [[TMP0]], i1 false)
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast <16 x i8> [[ABS_I]] to <4 x i32>
+// CHECK-NEXT:ret <4 x i32> [[TMP1]]
 //
 v128_t test_i8x16_abs(v128_t a) {
   return wasm_i8x16_abs(a);
@@ -1757,9 +1757,9 @@ v128_t test_u8x16_avgr(v128_t a, v128_t b) {
 // CHECK-LABEL: @test_i16x8_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:[[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> 
[[TMP0]], i1 false)
-// CHECK-NEXT:[[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:[[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x 
i16> [[TMP0]], i1 false)
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[ABS_I]] to <4 x i32>
+// CHECK-NEXT:ret <4 x i32> [[TMP1]]
 //
 v128_t test_i16x8_abs(v128_t a) {
   return wasm_i16x8_abs(a);
@@ -1985,8 +1985,8 @@ v128_t test_u16x8_avgr(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_i32x4_abs(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> 
[[A:%.*]], i1 false)
-// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+// CHECK-NEXT:[[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x 
i32> [[A:%.*]], i1 false)
+// CHECK-NEXT:ret <4 x i32> [[ABS_I]]
 //
 v128_t test_i32x4_abs(v128_t a) {
   return wasm_i32x4_abs(a);
@@ -2130,9 +2130,9 @@ v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
 // CHECK-LABEL: @test_i64x2_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:[[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> 
[[TMP0]], i1 false)
-// CHECK-NEXT:[[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:[[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x 
i64> [[TMP0]], i1 false)
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[ABS_I]] to <4 x i32>
+// CHECK-NEXT:ret <4 x i32> [[TMP1]]
 //
 v128_t test_i64x2_abs(v128_t a) {
   return wasm_i64x2_abs(a);



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 599b2f0 - [AArch64][SVE] Handle svbool_t VLST <-> VLAT/GNUT conversion

2021-07-21 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2021-07-22T13:55:08+08:00
New Revision: 599b2f00370ee79e812d2776f2af57fae36d02e9

URL: 
https://github.com/llvm/llvm-project/commit/599b2f00370ee79e812d2776f2af57fae36d02e9
DIFF: 
https://github.com/llvm/llvm-project/commit/599b2f00370ee79e812d2776f2af57fae36d02e9.diff

LOG: [AArch64][SVE] Handle svbool_t VLST <-> VLAT/GNUT conversion

According to https://godbolt.org/z/q5rME1naY and acle, we found that
there are different SVE conversion behaviours between clang and gcc. It turns
out that llvm does not handle SVE predicates width properly.

This patch 1) checks SVE predicates width rightly with svbool_t type.
2) removes warning on svbool_t VLST <-> VLAT/GNUT conversion.
3) disables VLST <-> VLAT/GNUT conversion between SVE vectors and predicates
due to different width.

Differential Revision: https://reviews.llvm.org/D106333

Added: 


Modified: 
clang/lib/AST/ASTContext.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Removed: 




diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 46b778d2834a8..e102a3ba508d4 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -8670,6 +8670,14 @@ bool ASTContext::areCompatibleVectorTypes(QualType 
FirstVec,
   return false;
 }
 
+/// getSVETypeSize - Return SVE vector or predicate register size.
+static uint64_t getSVETypeSize(ASTContext , const BuiltinType *Ty) {
+  assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type");
+  return Ty->getKind() == BuiltinType::SveBool
+ ? Context.getLangOpts().ArmSveVectorBits / Context.getCharWidth()
+ : Context.getLangOpts().ArmSveVectorBits;
+}
+
 bool ASTContext::areCompatibleSveTypes(QualType FirstType,
QualType SecondType) {
   assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) ||
@@ -8687,7 +8695,7 @@ bool ASTContext::areCompatibleSveTypes(QualType FirstType,
   return VT->getElementType().getCanonicalType() ==
  FirstType->getSveEltType(*this);
 else if (VT->getVectorKind() == VectorType::GenericVector)
-  return getTypeSize(SecondType) == getLangOpts().ArmSveVectorBits &&
+  return getTypeSize(SecondType) == getSVETypeSize(*this, BT) &&
  hasSameType(VT->getElementType(),
  getBuiltinVectorTypeInfo(BT).ElementType);
   }
@@ -8706,7 +8714,8 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType 
FirstType,
  "Expected SVE builtin type and vector type!");
 
   auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
-if (!FirstType->getAs())
+const auto *BT = FirstType->getAs();
+if (!BT)
   return false;
 
 const auto *VecTy = SecondType->getAs();
@@ -8716,13 +8725,19 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType 
FirstType,
   const LangOptions::LaxVectorConversionKind LVCKind =
   getLangOpts().getLaxVectorConversions();
 
+  // Can not convert between sve predicates and sve vectors because of
+  // 
diff erent size.
+  if (BT->getKind() == BuiltinType::SveBool &&
+  VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector)
+return false;
+
   // If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
   // "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
   // converts to VLAT and VLAT implicitly converts to GNUT."
   // ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
   // predicates.
   if (VecTy->getVectorKind() == VectorType::GenericVector &&
-  getTypeSize(SecondType) != getLangOpts().ArmSveVectorBits)
+  getTypeSize(SecondType) != getSVETypeSize(*this, BT))
 return false;
 
   // If -flax-vector-conversions=all is specified, the types are

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 242c2968da45f..02da39c11d7f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12570,15 +12570,13 @@ static void CheckImplicitConversion(Sema , Expr *E, 
QualType T,
 checkObjCDictionaryLiteral(S, QualType(Target, 0), DictionaryLiteral);
 
   // Strip vector types.
-  if (const auto *SourceVT = dyn_cast(Source)) {
-if (Target->isVLSTBuiltinType()) {
-  auto SourceVectorKind = SourceVT->getVectorKind();
-  if (SourceVectorKind == VectorType::SveFixedLengthDataVector ||
-  SourceVectorKind == VectorType::SveFixedLengthPredicateVector ||
-  (SourceVectorKind == VectorType::GenericVector &&
-   S.Context.getTypeSize(Source) == S.getLangOpts().ArmSveVectorBits))
-

[clang] 3afbf89 - [clang][AArch64][SVE] Handle PRValue under VLAT <-> VLST cast

2021-06-30 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2021-07-01T10:09:47+08:00
New Revision: 3afbf898044aa5839ed75273fa38a897abe9d3d4

URL: 
https://github.com/llvm/llvm-project/commit/3afbf898044aa5839ed75273fa38a897abe9d3d4
DIFF: 
https://github.com/llvm/llvm-project/commit/3afbf898044aa5839ed75273fa38a897abe9d3d4.diff

LOG: [clang][AArch64][SVE] Handle PRValue under VLAT <-> VLST cast

This change fixes the crash that PRValue cannot be handled by
EmitLValue.

Differential Revision: https://reviews.llvm.org/D105097

Added: 


Modified: 
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index d299a1d38e0eb..92015c8efda0a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2095,24 +2095,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
  isa(DstTy)) ||
 (isa(SrcTy) &&
  isa(DstTy))) {
-  if (const CallExpr *CE = dyn_cast(E)) {
-// Call expressions can't have a scalar return unless the return type
-// is a reference type so an lvalue can't be emitted. Create a temp
-// alloca to store the call, bitcast the address then load.
-QualType RetTy = CE->getCallReturnType(CGF.getContext());
-Address Addr =
-CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-call-rvalue");
-LValue LV = CGF.MakeAddrLValue(Addr, RetTy);
-CGF.EmitStoreOfScalar(Src, LV);
-Addr = Builder.CreateElementBitCast(Addr, 
CGF.ConvertTypeForMem(DestTy),
-"castFixedSve");
-LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
-DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
-return EmitLoadOfLValue(DestLV, CE->getExprLoc());
-  }
-
-  Address Addr = EmitLValue(E).getAddress(CGF);
-  Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy));
+  Address Addr = CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-value");
+  LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
+  CGF.EmitStoreOfScalar(Src, LV);
+  Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
+  "castFixedSve");
   LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
   DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
   return EmitLoadOfLValue(DestLV, CE->getExprLoc());

diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c 
b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
index 73ac3f49cf3bd..278cc930610bd 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -30,21 +30,21 @@ DEFINE_STRUCT(bool)
 // CHECK-128-LABEL: @read_int64(
 // CHECK-128-NEXT:  entry:
 // CHECK-128-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], 
align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], 
align 16, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-128-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x 
i64> [[TMP0]], i64 0)
 // CHECK-128-NEXT:ret  [[CASTSCALABLESVE]]
 //
 // CHECK-256-LABEL: @read_int64(
 // CHECK-256-NEXT:  entry:
 // CHECK-256-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:[[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], 
align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-256-NEXT:[[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], 
align 16, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-256-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv2i64.v4i64( undef, <4 x 
i64> [[TMP0]], i64 0)
 // CHECK-256-NEXT:ret  [[CASTSCALABLESVE]]
 //
 // CHECK-512-LABEL: @read_int64(
 // CHECK-512-NEXT:  entry:
 // CHECK-512-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], 
align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], 
align 16, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-512-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv2i64.v8i64( undef, <8 x 
i64> [[TMP0]], i64 0)
 // CHECK-512-NEXT:ret  [[CASTSCALABLESVE]]
 //
@@ -56,21 +56,21 @@ svint64_t 

[clang] ae54339 - [AArch64][SVEIntrinsicOpts] Convect cntb/h/w/d to vscale intrinsic or constant.

2021-06-30 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2021-07-01T10:09:47+08:00
New Revision: ae5433945f915c2f0e39f8a026166cbdc8b0469c

URL: 
https://github.com/llvm/llvm-project/commit/ae5433945f915c2f0e39f8a026166cbdc8b0469c
DIFF: 
https://github.com/llvm/llvm-project/commit/ae5433945f915c2f0e39f8a026166cbdc8b0469c.diff

LOG: [AArch64][SVEIntrinsicOpts] Convect cntb/h/w/d to vscale intrinsic or 
constant.

As is mentioned above

Differential Revision: https://reviews.llvm.org/D104852

Added: 

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-counting-elems.ll

Modified: 
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 




diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c 
b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
index 22fb8a2e39f1a..9e4326e312c54 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
@@ -7,8 +7,9 @@
 uint64_t test_svcntb()
 {
   // CHECK-LABEL: test_svcntb
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 31)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.vscale.i64()
+  // CHECK-NEXT: %[[RET:.*]] = shl i64 %[[INTRINSIC]], 4
+  // CHECK: ret i64 %[[RET]]
   return svcntb();
 }
 
@@ -23,72 +24,63 @@ uint64_t test_svcntb_pat()
 uint64_t test_svcntb_pat_1()
 {
   // CHECK-LABEL: test_svcntb_pat_1
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 1)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 1
   return svcntb_pat(SV_VL1);
 }
 
 uint64_t test_svcntb_pat_2()
 {
   // CHECK-LABEL: test_svcntb_pat_2
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 2)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 2
   return svcntb_pat(SV_VL2);
 }
 
 uint64_t test_svcntb_pat_3()
 {
   // CHECK-LABEL: test_svcntb_pat_3
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 3)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 3
   return svcntb_pat(SV_VL3);
 }
 
 uint64_t test_svcntb_pat_4()
 {
   // CHECK-LABEL: test_svcntb_pat_4
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 4
   return svcntb_pat(SV_VL4);
 }
 
 uint64_t test_svcntb_pat_5()
 {
   // CHECK-LABEL: test_svcntb_pat_5
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 5)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 5
   return svcntb_pat(SV_VL5);
 }
 
 uint64_t test_svcntb_pat_6()
 {
   // CHECK-LABEL: test_svcntb_pat_6
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 6)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 6
   return svcntb_pat(SV_VL6);
 }
 
 uint64_t test_svcntb_pat_7()
 {
   // CHECK-LABEL: test_svcntb_pat_7
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 7)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 7
   return svcntb_pat(SV_VL7);
 }
 
 uint64_t test_svcntb_pat_8()
 {
   // CHECK-LABEL: test_svcntb_pat_8
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 8)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 8
   return svcntb_pat(SV_VL8);
 }
 
 uint64_t test_svcntb_pat_9()
 {
   // CHECK-LABEL: test_svcntb_pat_9
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 9)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: ret i64 16
   return svcntb_pat(SV_VL16);
 }
 
@@ -143,7 +135,8 @@ uint64_t test_svcntb_pat_15()
 uint64_t test_svcntb_pat_16()
 {
   // CHECK-LABEL: test_svcntb_pat_16
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntb(i32 31)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.vscale.i64()
+  // CHECK-NEXT: %[[RET:.*]] = shl i64 %[[INTRINSIC]], 4
+  // CHECK: ret i64 %[[RET]]
   return svcntb_pat(SV_ALL);
 }

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c 
b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
index 86108629d94ab..9880968bae9b0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
@@ -7,8 +7,9 @@
 uint64_t test_svcntd()
 {
   // CHECK-LABEL: test_svcntd
-  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntd(i32 31)
-  // CHECK: ret i64 %[[INTRINSIC]]
+  // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.vscale.i64()
+  // CHECK-NEXT: %[[RET:.*]] = shl i64 %[[INTRINSIC]], 1
+  // CHECK: ret i64 %[[RET]]
   return svcntd();
 }
 
@@ -23,16 +24,14 @@ uint64_t test_svcntd_pat()
 uint64_t test_svcntd_pat_1()
 {
   // CHECK-LABEL: test_svcntd_pat_1
-  // 

[clang] 8849831 - [Coroutines] Warning if return type of coroutine_handle::address is not void*

2020-07-05 Thread Jun Ma via cfe-commits

Author: Chuanqi Xu
Date: 2020-07-06T13:46:01+08:00
New Revision: 8849831d55a203eca1069a0e11877ab7e7e0ac57

URL: 
https://github.com/llvm/llvm-project/commit/8849831d55a203eca1069a0e11877ab7e7e0ac57
DIFF: 
https://github.com/llvm/llvm-project/commit/8849831d55a203eca1069a0e11877ab7e7e0ac57.diff

LOG: [Coroutines] Warning if return type of coroutine_handle::address is not 
void*

User can own a version of coroutine_handle::address() whose return type is not
void* by using template specialization for coroutine_handle<> for some
promise_type.

In this case, the codes may violate the capability with existing async C APIs
that accepted a void* data parameter which was then passed back to the
user-provided callback.

Patch by ChuanqiXu

Differential Revision: https://reviews.llvm.org/D82442

Added: 
clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp

Modified: 
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Sema/SemaCoroutine.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f0921337f312..5b94aa8c4325 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10527,6 +10527,9 @@ def err_await_suspend_invalid_return_type : Error<
 def note_await_ready_no_bool_conversion : Note<
   "return type of 'await_ready' is required to be contextually convertible to 
'bool'"
 >;
+def warn_coroutine_handle_address_invalid_return_type : Warning <
+  "return type of 'coroutine_handle<>::address should be 'void*' (have %0) in 
order to get capability with existing async C API.">,
+  InGroup;
 def err_coroutine_promise_final_suspend_requires_nothrow : Error<
   "the expression 'co_await __promise.final_suspend()' is required to be 
non-throwing"
 >;

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 6b2538a677e5..b1841d646643 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1751,6 +1751,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; }
 };
   };
+
 private:
   /// CXXThisDecl - When generating code for a C++ member function,
   /// this will hold the implicit 'this' declaration.

diff  --git a/clang/lib/Sema/SemaCoroutine.cpp 
b/clang/lib/Sema/SemaCoroutine.cpp
index 70b8fd282056..992cccac6405 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -391,7 +391,13 @@ static Expr *maybeTailCall(Sema , QualType RetType, Expr 
*E,
 return nullptr;
 
   Expr *JustAddress = AddressExpr.get();
-  // FIXME: Check that the type of AddressExpr is void*
+
+  // Check that the type of AddressExpr is void*
+  if (!JustAddress->getType().getTypePtr()->isVoidPointerType())
+S.Diag(cast(JustAddress)->getCalleeDecl()->getLocation(),
+   diag::warn_coroutine_handle_address_invalid_return_type)
+<< JustAddress->getType();
+
   return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume,
   JustAddress);
 }

diff  --git a/clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp 
b/clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp
new file mode 100644
index ..a95138365234
--- /dev/null
+++ b/clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -verify %s -stdlib=libc++ -std=c++1z -fcoroutines-ts 
-fsyntax-only
+
+namespace std::experimental {
+template 
+struct coroutine_handle;
+
+template <>
+struct coroutine_handle {
+  coroutine_handle() = default;
+  static coroutine_handle from_address(void *) noexcept;
+  void *address() const;
+};
+
+template 
+struct coroutine_handle : public coroutine_handle<> {
+};
+
+template 
+struct void_t_imp {
+  using type = void;
+};
+template 
+using void_t = typename void_t_imp::type;
+
+template 
+struct traits_sfinae_base {};
+
+template 
+struct traits_sfinae_base> {
+  using promise_type = typename T::promise_type;
+};
+
+template 
+struct coroutine_traits : public traits_sfinae_base {};
+} // namespace std::experimental
+
+struct suspend_never {
+  bool await_ready() noexcept;
+  void await_suspend(std::experimental::coroutine_handle<>) noexcept;
+  void await_resume() noexcept;
+};
+
+struct task {
+  struct promise_type {
+auto initial_suspend() { return suspend_never{}; }
+auto final_suspend() noexcept { return suspend_never{}; }
+auto get_return_object() { return task{}; }
+static void unhandled_exception() {}
+void return_void() {}
+  };
+};
+
+namespace std::experimental {
+template <>
+struct coroutine_handle : public coroutine_handle<> {
+  coroutine_handle *address() const; // expected-warning 
{{return type of 'coroutine_handle<>::address should be 'void*'}}

[clang] 4a17769 - [CodeGen][TLS] Set TLS Model for __tls_guard as well.

2020-06-16 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2020-06-17T08:31:13+08:00
New Revision: 4a1776979fd8e9473e433d7ec6f2bbf4bf9523ff

URL: 
https://github.com/llvm/llvm-project/commit/4a1776979fd8e9473e433d7ec6f2bbf4bf9523ff
DIFF: 
https://github.com/llvm/llvm-project/commit/4a1776979fd8e9473e433d7ec6f2bbf4bf9523ff.diff

LOG: [CodeGen][TLS] Set TLS Model for __tls_guard as well.

Differential Revision: https://reviews.llvm.org/D81543

Added: 
clang/test/CodeGen/tls-model.cpp

Modified: 
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/CodeGen/ItaniumCXXABI.cpp
clang/lib/CodeGen/MicrosoftCXXABI.cpp
clang/test/CodeGenCXX/ms-thread_local.cpp

Removed: 
clang/test/CodeGen/tls-model.c



diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 887df8a97f34..79bc51e43f7a 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -971,9 +971,9 @@ static llvm::GlobalVariable::ThreadLocalMode 
GetLLVMTLSModel(StringRef S) {
   .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel);
 }
 
-static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(
-CodeGenOptions::TLSModel M) {
-  switch (M) {
+llvm::GlobalVariable::ThreadLocalMode
+CodeGenModule::GetDefaultLLVMTLSModel() const {
+  switch (CodeGenOpts.getDefaultTLSModel()) {
   case CodeGenOptions::GeneralDynamicTLSModel:
 return llvm::GlobalVariable::GeneralDynamicTLSModel;
   case CodeGenOptions::LocalDynamicTLSModel:
@@ -990,7 +990,7 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const 
VarDecl ) const {
   assert(D.getTLSKind() && "setting TLS mode on non-TLS var!");
 
   llvm::GlobalValue::ThreadLocalMode TLM;
-  TLM = GetLLVMTLSModel(CodeGenOpts.getDefaultTLSModel());
+  TLM = GetDefaultLLVMTLSModel();
 
   // Override the TLS model if it is explicitly specified.
   if (const TLSModelAttr *Attr = D.getAttr()) {

diff  --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index 17d42c50728e..d17d652b32f6 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -790,6 +790,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// variable declaration D.
   void setTLSMode(llvm::GlobalValue *GV, const VarDecl ) const;
 
+  /// Get LLVM TLS mode from CodeGenOptions.
+  llvm::GlobalVariable::ThreadLocalMode GetDefaultLLVMTLSModel() const;
+
   static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) {
 switch (V) {
 case DefaultVisibility:   return llvm::GlobalValue::DefaultVisibility;

diff  --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index a4f2780c..f79de5db2d39 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2687,6 +2687,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
 llvm::GlobalVariable::InternalLinkage,
 llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
 Guard->setThreadLocal(true);
+Guard->setThreadLocalMode(CGM.GetDefaultLLVMTLSModel());
 
 CharUnits GuardAlign = CharUnits::One();
 Guard->setAlignment(GuardAlign.getAsAlign());

diff  --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp 
b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index a9d87f135b65..6f6295a17158 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -2523,7 +2523,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction 
, const VarDecl ,
   GuardVar->setComdat(
   CGM.getModule().getOrInsertComdat(GuardVar->getName()));
 if (D.getTLSKind())
-  GuardVar->setThreadLocal(true);
+  CGM.setTLSMode(GuardVar, D);
 if (GI && !HasPerVariableGuard)
   GI->Guard = GuardVar;
   }

diff  --git a/clang/test/CodeGen/tls-model.c b/clang/test/CodeGen/tls-model.cpp
similarity index 57%
rename from clang/test/CodeGen/tls-model.c
rename to clang/test/CodeGen/tls-model.cpp
index 313c3b1dfd41..872566fcf0be 100644
--- a/clang/test/CodeGen/tls-model.c
+++ b/clang/test/CodeGen/tls-model.cpp
@@ -16,29 +16,52 @@ int f() {
 }
 int __thread __attribute__((tls_model("initial-exec"))) z;
 
+struct S {
+  S();
+  ~S();
+};
+struct T {
+  ~T();
+};
+
+struct S thread_local s1;
+struct T thread_local t1;
+
 // Note that unlike normal C uninitialized global variables,
 // uninitialized TLS variables do NOT have COMMON linkage.
 
 // CHECK-GD: @z1 = global i32 0
-// CHECK-GD: @f.y = internal thread_local global i32 0
 // CHECK-GD: @z2 = global i32 0
 // CHECK-GD: @x = thread_local global i32 0
+// CHECK-GD: @_ZZ1fvE1y = internal thread_local global i32 0
 // CHECK-GD: @z = thread_local(initialexec) global i32 0
+// CHECK-GD: @s1 = thread_local global %struct.S zeroinitializer
+// CHECK-GD: @t1 = thread_local global %struct.T zeroinitializer
+// CHECK-GD: @__tls_guard = internal thread_local global i8 0
 
 // CHECK-LD: @z1 = global i32 0

[clang] a0de333 - [clang] Implement VectorType logic not operator.

2020-06-07 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2020-06-08T08:41:01+08:00
New Revision: a0de3335edcf19305dad592d21ebe402825184f6

URL: 
https://github.com/llvm/llvm-project/commit/a0de3335edcf19305dad592d21ebe402825184f6
DIFF: 
https://github.com/llvm/llvm-project/commit/a0de3335edcf19305dad592d21ebe402825184f6.diff

LOG: [clang] Implement VectorType logic not operator.

Differential Revision: https://reviews.llvm.org/D80979

Added: 
clang/test/CodeGen/vector-logic-not.cpp

Modified: 
clang/docs/LanguageExtensions.rst
clang/lib/CodeGen/CGExprScalar.cpp
clang/lib/Sema/SemaExpr.cpp
clang/test/Sema/vector-gcc-compat.cpp

Removed: 




diff  --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index ba0a7d9cf95c..06ecc186c7dc 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -475,7 +475,7 @@ unary operators +, --yes yes   yes  
   --
 +,--,*,/,%   yes yes   yes --
 bitwise operators &,|,^,~yes yes   yes --
 >>,<, <, >=, <= yes yes   yes --
 =yes yes   yes yes
 ?: [#]_  yes --yes --
@@ -488,7 +488,6 @@ const_cast   no  nono   
   no
 
 See also :ref:`langext-__builtin_shufflevector`, 
:ref:`langext-__builtin_convertvector`.
 
-.. [#] unary operator ! is not implemented, however && and || are.
 .. [#] ternary operator(?:) has 
diff erent behaviors depending on condition
   operand's vector type. If the condition is a GNU vector (i.e. 
__vector_size__),
   it's only available in C++ and uses normal bool conversions (that is, != 0).

diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 4e61349cf4d5..b2bc38b329ef 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2762,7 +2762,9 @@ Value *ScalarExprEmitter::VisitUnaryNot(const 
UnaryOperator *E) {
 
 Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
   // Perform vector logical not on comparison with zero vector.
-  if (E->getType()->isExtVectorType()) {
+  if (E->getType()->isVectorType() &&
+  E->getType()->castAs()->getVectorKind() ==
+  VectorType::GenericVector) {
 Value *Oper = Visit(E->getSubExpr());
 Value *Zero = llvm::Constant::getNullValue(Oper->getType());
 Value *Result;

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 45c1acecbe94..4bec413f3042 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14481,12 +14481,19 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation 
OpLoc,
   return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
<< resultType << Input.get()->getSourceRange());
   }
+  // Vector logical not returns the signed variant of the operand type.
+  resultType = GetSignedVectorType(resultType);
+  break;
+} else if (Context.getLangOpts().CPlusPlus && resultType->isVectorType()) {
+  const VectorType *VTy = resultType->castAs();
+  if (VTy->getVectorKind() != VectorType::GenericVector)
+return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
+ << resultType << Input.get()->getSourceRange());
+
   // Vector logical not returns the signed variant of the operand type.
   resultType = GetSignedVectorType(resultType);
   break;
 } else {
-  // FIXME: GCC's vector extension permits the usage of '!' with a vector
-  //type in C++. We should allow that here too.
   return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
 << resultType << Input.get()->getSourceRange());
 }

diff  --git a/clang/test/CodeGen/vector-logic-not.cpp 
b/clang/test/CodeGen/vector-logic-not.cpp
new file mode 100644
index ..2ac026711e82
--- /dev/null
+++ b/clang/test/CodeGen/vector-logic-not.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+
+typedef __attribute__((__vector_size__(16))) float float4;
+typedef __attribute__((__vector_size__(16))) int int4;
+typedef __attribute__((__vector_size__(16))) unsigned int uint4;
+
+// CHECK: @_Z5test1Dv4_j
+int4 test1(uint4 V0) {
+  // CHECK: [[CMP0:%.*]] = icmp eq <4 x i32> [[V0:%.*]], zeroinitializer
+  // CHECK-NEXT: [[V1:%.*]] = sext <4 x i1> [[CMP0]] to <4 x i32>
+  int4 V = !V0;
+  return V;
+}
+
+// CHECK: @_Z5test2Dv4_fS_
+int4 test2(float4 V0, float4 V1) {
+  // CHECK: [[CMP0:%.*]] = fcmp oeq <4 x float> [[V0:%.*]], zeroinitializer
+  // CHECK-NEXT: [[V1:%.*]] = sext <4 x i1> 

[clang] d0f4af8 - [Coroutines] Insert lifetime intrinsics even O0 is used

2020-03-23 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2020-03-24T13:41:55+08:00
New Revision: d0f4af8f3088f72df7fea9983127cbeeebbef6a1

URL: 
https://github.com/llvm/llvm-project/commit/d0f4af8f3088f72df7fea9983127cbeeebbef6a1
DIFF: 
https://github.com/llvm/llvm-project/commit/d0f4af8f3088f72df7fea9983127cbeeebbef6a1.diff

LOG: [Coroutines] Insert lifetime intrinsics even O0 is used

Differential Revision: https://reviews.llvm.org/D76119

Added: 
clang/test/CodeGenCoroutines/coro-always-inline.cpp

Modified: 
clang/lib/CodeGen/BackendUtil.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index e8f2524a25d5..97b97276521d 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -579,8 +579,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager 
,
   // At O0 and O1 we only run the always inliner which is more efficient. At
   // higher optimization levels we run the normal inliner.
   if (CodeGenOpts.OptimizationLevel <= 1) {
-bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 &&
- !CodeGenOpts.DisableLifetimeMarkers);
+bool InsertLifetimeIntrinsics = ((CodeGenOpts.OptimizationLevel != 0 &&
+  !CodeGenOpts.DisableLifetimeMarkers) ||
+ LangOpts.Coroutines);
 PMBuilder.Inliner = 
createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
   } else {
 // We do not want to inline hot callsites for SamplePGO module-summary 
build
@@ -1176,7 +1177,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
   // which is just that always inlining occurs. Further, disable generating
   // lifetime intrinsics to avoid enabling further optimizations during
   // code generation.
-  MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false));
+  // However, we need to insert lifetime intrinsics to avoid invalid access
+  // caused by multithreaded coroutines.
+  MPM.addPass(
+  AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/LangOpts.Coroutines));
 
   // At -O0, we can still do PGO. Add all the requested passes for
   // instrumentation PGO, if requested.

diff  --git a/clang/test/CodeGenCoroutines/coro-always-inline.cpp 
b/clang/test/CodeGenCoroutines/coro-always-inline.cpp
new file mode 100644
index ..a2e4bba45c0c
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-always-inline.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts 
\
+// RUN:   -fexperimental-new-pass-manager -O0 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts 
\
+// RUN:   -fexperimental-new-pass-manager -fno-inline -O0 %s -o - | FileCheck 
%s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts 
\
+// RUN:   -O0 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts 
\
+// RUN:   -fno-inline -O0 %s -o - | FileCheck %s
+
+namespace std {
+namespace experimental {
+
+struct handle {};
+
+struct awaitable {
+  bool await_ready() { return true; }
+  // CHECK-NOT: await_suspend
+  inline void __attribute__((__always_inline__)) await_suspend(handle) {}
+  bool await_resume() { return true; }
+};
+
+template 
+struct coroutine_handle {
+  static handle from_address(void *address) { return {}; }
+};
+
+template 
+struct coroutine_traits {
+  struct promise_type {
+awaitable initial_suspend() { return {}; }
+awaitable final_suspend() { return {}; }
+void return_void() {}
+T get_return_object() { return T(); }
+void unhandled_exception() {}
+  };
+};
+} // namespace experimental
+} // namespace std
+
+// CHECK-LABEL: @_Z3foov
+// CHECK-LABEL: entry:
+// CHECK-NEXT: %this.addr.i{{[0-9]*}} = alloca 
%"struct.std::experimental::awaitable"*, align 8
+// CHECK-NEXT: %this.addr.i{{[0-9]*}} = alloca 
%"struct.std::experimental::awaitable"*, align 8
+// CHECK: [[CAST0:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"** 
%this.addr.i{{[0-9]*}} to i8*
+// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CAST0]])
+// CHECK: [[CAST1:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"** 
%this.addr.i{{[0-9]*}} to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[CAST1]])
+
+// CHECK: [[CAST2:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"** 
%this.addr.i{{[0-9]*}} to i8*
+// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CAST2]])
+// CHECK: [[CAST3:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"** 
%this.addr.i{{[0-9]*}} to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[CAST3]])
+void foo() { co_return; }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org

[clang] 53c2e10 - [Coroutines] Do not evaluate InitListExpr of a co_return

2020-03-15 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2020-03-16T12:42:44+08:00
New Revision: 53c2e10fb8a606aa0cb092dda1219603cc3017cd

URL: 
https://github.com/llvm/llvm-project/commit/53c2e10fb8a606aa0cb092dda1219603cc3017cd
DIFF: 
https://github.com/llvm/llvm-project/commit/53c2e10fb8a606aa0cb092dda1219603cc3017cd.diff

LOG: [Coroutines] Do not evaluate InitListExpr of a co_return

Differential Revision: https://reviews.llvm.org/D76118

Added: 
clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp

Modified: 
clang/lib/CodeGen/CGCoroutine.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGCoroutine.cpp 
b/clang/lib/CodeGen/CGCoroutine.cpp
index aee5a927a055..5c57ad0685d5 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -275,9 +275,9 @@ RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr 
,
 void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const ) {
   ++CurCoro.Data->CoreturnCount;
   const Expr *RV = S.getOperand();
-  if (RV && RV->getType()->isVoidType()) {
-// Make sure to evaluate the expression of a co_return with a void
-// expression for side effects.
+  if (RV && RV->getType()->isVoidType() && !isa(RV)) {
+// Make sure to evaluate the non initlist expression of a co_return
+// with a void expression for side effects.
 RunCleanupsScope cleanupScope(*this);
 EmitIgnoredExpr(RV);
   }

diff  --git a/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp 
b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp
new file mode 100644
index ..8526b6f8a8da
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++1z 
-emit-llvm %s -o - -disable-llvm-passes | FileCheck %s
+
+namespace std {
+template 
+struct b { b(int, a); };
+template 
+struct c {};
+namespace experimental {
+template 
+struct coroutine_traits : d {};
+template 
+struct coroutine_handle;
+template <>
+struct coroutine_handle<> {};
+template 
+struct coroutine_handle : coroutine_handle<> {
+  static coroutine_handle from_address(void *);
+};
+struct e {
+  int await_ready();
+  void await_suspend(coroutine_handle<>);
+  void await_resume();
+};
+} // namespace experimental
+} // namespace std
+template 
+auto ah(ag) { return ag().ah(0); }
+template 
+struct f;
+struct g {
+  struct h {
+int await_ready();
+template 
+void await_suspend(std::experimental::coroutine_handle);
+void await_resume();
+  };
+  std::experimental::e initial_suspend();
+  h final_suspend();
+  template 
+  auto await_transform(ag) { return ah(ag()); }
+};
+struct j : g {
+  f>> get_return_object();
+  void return_value(std::b>);
+  void unhandled_exception();
+};
+struct k {
+  k(std::experimental::coroutine_handle<>);
+  int await_ready();
+};
+template 
+struct f {
+  using promise_type = j;
+  std::experimental::coroutine_handle<> ar;
+  struct l : k {
+using at = k;
+l(std::experimental::coroutine_handle<> m) : at(m) {}
+void await_suspend(std::experimental::coroutine_handle<>);
+  };
+  struct n : l {
+n(std::experimental::coroutine_handle<> m) : l(m) {}
+am await_resume();
+  };
+  auto ah(int) { return n(ar); }
+};
+template 
+auto ax(std::c, aw) -> f>;
+template 
+struct J { static f>> bo(); };
+// CHECK-LABEL: _ZN1JIiE2boEv(
+template 
+f>> J::bo() {
+  std::c bu;
+  int bw(0);
+  // CHECK: void @_ZN1j12return_valueESt1bISt1cIiiEE(%struct.j* %__promise)
+  co_return{0, co_await ax(bu, bw)};
+}
+void bh() {
+  auto cn = [] { J::bo; };
+  cn();
+}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 624dbfc - [Coroutines][New pass manager] Move CoroElide pass to right position

2020-03-01 Thread Jun Ma via cfe-commits

Author: Jun Ma
Date: 2020-03-01T21:48:24+08:00
New Revision: 624dbfcc1b81520d2211d43a759f817d0b131de0

URL: 
https://github.com/llvm/llvm-project/commit/624dbfcc1b81520d2211d43a759f817d0b131de0
DIFF: 
https://github.com/llvm/llvm-project/commit/624dbfcc1b81520d2211d43a759f817d0b131de0.diff

LOG: [Coroutines][New pass manager] Move CoroElide pass to right position

Differential Revision: https://reviews.llvm.org/D75345

Added: 


Modified: 
clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/test/Transforms/Coroutines/ex2.ll
llvm/test/Transforms/Coroutines/ex3.ll

Removed: 




diff  --git a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp 
b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
index aed2cf13f892..cea71a1acc6b 100644
--- a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
+++ b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
@@ -14,14 +14,14 @@
 // The first coro-split pass enqueues a second run of the entire CGSCC 
pipeline.
 // CHECK: Starting CGSCC pass manager run.
 // CHECK: Running pass: CoroSplitPass on (_Z3foov)
-// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on (_Z3foov)
+// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}}
 // CHECK: Finished CGSCC pass manager run.
 //
 // The second coro-split pass splits coroutine 'foo' into funclets
 // 'foo.resume', 'foo.destroy', and 'foo.cleanup'.
 // CHECK: Starting CGSCC pass manager run.
 // CHECK: Running pass: CoroSplitPass on (_Z3foov)
-// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on (_Z3foov)
+// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}}
 // CHECK: Finished CGSCC pass manager run.
 //
 // CHECK: Running pass:{{.*}}CoroCleanupPass

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f4806b2db3c8..eb5b3a61fa89 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -561,6 +561,9 @@ 
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 EnableMSSALoopDependency, DebugLogging));
   }
 
+  if (PTO.Coroutines)
+FPM.addPass(CoroElidePass());
+
   for (auto  : ScalarOptimizerLateEPCallbacks)
 C(FPM, Level);
 
@@ -847,10 +850,8 @@ 
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
 
   MainCGPipeline.addPass(AttributorCGSCCPass());
 
-  if (PTO.Coroutines) {
+  if (PTO.Coroutines)
 MainCGPipeline.addPass(CoroSplitPass());
-MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
-  }
 
   // Now deduce any function attributes based in the current code.
   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());

diff  --git a/llvm/test/Transforms/Coroutines/ex2.ll 
b/llvm/test/Transforms/Coroutines/ex2.ll
index cd7d8d2a20ed..584bc909a4eb 100644
--- a/llvm/test/Transforms/Coroutines/ex2.ll
+++ b/llvm/test/Transforms/Coroutines/ex2.ll
@@ -40,8 +40,14 @@ entry:
   %hdl = call i8* @f(i32 4)
   call void @llvm.coro.resume(i8* %hdl)
   call void @llvm.coro.resume(i8* %hdl)
+  %to = icmp eq i8* %hdl, null
+  br i1 %to, label %return, label %destroy
+destroy:
   call void @llvm.coro.destroy(i8* %hdl)
+  br label %return
+return:
   ret i32 0
+; CHECK-NOT:  call i8* @CustomAlloc
 ; CHECK:  call void @print(i32 4)
 ; CHECK-NEXT: call void @print(i32 5)
 ; CHECK-NEXT: call void @print(i32 6)

diff  --git a/llvm/test/Transforms/Coroutines/ex3.ll 
b/llvm/test/Transforms/Coroutines/ex3.ll
index 50ce19e26372..85cf53fb576d 100644
--- a/llvm/test/Transforms/Coroutines/ex3.ll
+++ b/llvm/test/Transforms/Coroutines/ex3.ll
@@ -6,11 +6,17 @@ define i8* @f(i32 %n) {
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %size = call i32 @llvm.coro.size.i32()
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
   %alloc = call i8* @malloc(i32 %size)
-  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
   br label %loop
 loop:
-  %n.val = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
+  %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop.resume ]
   call void @print(i32 %n.val) #4
   %0 = call i8 @llvm.coro.suspend(token none, i1 false)
   switch i8 %0, label %suspend [i8 0, label %loop.resume
@@ -37,8 +43,15 @@ entry:
   %hdl = call i8* @f(i32 4)
   call void @llvm.coro.resume(i8* %hdl)
   call void @llvm.coro.resume(i8* %hdl)
+  %c = ptrtoint i8* %hdl to i64
+  %to = icmp eq i64 %c, 0
+  br i1 %to, label %return, label %destroy
+destroy:
   call void @llvm.coro.destroy(i8* %hdl)
+  br label %return
+return:
   ret i32 0
+; CHECK-NOT:  i8* @malloc
 ; CHECK:  call void @print(i32 4)
 ; CHECK-NEXT: call void @print(i32 -5)
 ;