@@ -1953,13 +1966,22 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue,
LValue dest,
}
// Okay, we're doing this natively.
-llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+llvm::Value *ValToStore =
+atomics.convertRValueToInt(rvalue,
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
I think some of the AMDGPUUsage work-groups
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
The backend facing parts seem more
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
Ugh. The ISA manuals usually use "workgroup".
@@ -137,6 +137,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
arsenm wrote:
s/work_groups/workgroup/
@@ -1312,6 +1312,11 @@ The AMDGPU backend supports the following LLVM IR
attributes.
the frame. This is an internal
detail of how LDS variables are lowered,
language front ends should
@@ -494,6 +494,14 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const
MachineFunction ,
Kern[".max_flat_workgroup_size"] =
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
+ unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
+ unsigned NumWGY =
@@ -6826,6 +6826,10 @@ def warn_floatingpoint_eq : Warning<
"comparing floating point with == or != is unsafe">,
InGroup>, DefaultIgnore;
+def warn_fenv_access : Warning<
+ "floating point environment access without #pragma STDC FENV_ACCESS set ON">,
+ InGroup>;
@@ -18057,6 +18057,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
/*ReturnType*/ Op0->getType(), Intrinsic::dx_frac,
ArrayRef{Op0}, nullptr, "dx.frac");
}
+ case Builtin::BI__builtin_hlsl_elementwise_rcp: {
+Value *Op0 =
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck
%s
+//
+// Test that floating point atomic stores and loads do not get casted to/from
+// integer.
+
+#include
+
+_Atomic float Af;
+_Atomic double Ad;
+_Atomic long double Ald;
+
@@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value
*AddForLoaded,
}
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
- bool IsVolatile) {
+ bool
@@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value
*AddForLoaded,
}
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
- bool IsVolatile) {
+ bool
@@ -6826,6 +6826,10 @@ def warn_floatingpoint_eq : Warning<
"comparing floating point with == or != is unsafe">,
InGroup>, DefaultIgnore;
+def warn_fenv_access : Warning<
+ "floating point environment access without #pragma STDC FENV_ACCESS set ON">,
+ InGroup>;
@@ -0,0 +1,77 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck
--check-prefix=ERROR %s
+
+; ERROR: error: can't parse integer attribute -1 in amdgpu-max-num-work-groups
+define amdgpu_kernel void @empty_max_num_work_groups_neg_num1() #21 {
+entry:
@@ -194,3 +204,105 @@ __global__ void non_cexpr_waves_per_eu_2() {}
// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to
be an integer constant}}
__attribute__((amdgpu_waves_per_eu(2, ipow2(2
__global__ void non_cexpr_waves_per_eu_2_4() {}
+
+//
@@ -139,6 +139,36 @@ kernel void
reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK: define{{.*}} amdgpu_kernel void
@reqd_work_group_size_32_2_1_flat_work_group_size_16_128()
[[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
}
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/83927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
I think we would be better off teaching an IR optimizer pass to recognize the
divide pattern and remap it to the load from the location, rather than forcing
the complexity into every frontend
https://github.com/llvm/llvm-project/pull/83927
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
This interface is 1 level removed from the hardware, but this cannot be
legitimately used without fenv access
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
It's a standard C concept. You need to enable #pragma STDC FENV_ACCESS ON to do
anything valid with the floating
@@ -839,6 +839,18 @@ unsigned test_wavefrontsize() {
return __builtin_amdgcn_wavefrontsize();
}
+// CHECK-LABEL test_get_fpenv(
arsenm wrote:
Ideally we would also warn if you used these without fenv access enabled
@@ -6,32 +6,32 @@
// R600-based processors.
//
-// RUN: %clang -E -dM -target r600 -mcpu=r600 %s 2>&1 | FileCheck
--check-prefixes=ARCH-R600,R600 %s -DCPU=r600
-// RUN: %clang -E -dM -target r600 -mcpu=rv630 %s 2>&1 | FileCheck
--check-prefixes=ARCH-R600,R600 %s -DCPU=r600
@@ -292,8 +292,14 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions
,
}
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
+
+ // Don't emit feature macros in host code because in such cases the
+ // feature list is not accurate.
+ if
@@ -1410,13 +1414,14 @@ RValue
AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
auto *ValTy = AsValue
? CGF.ConvertTypeForMem(ValueTy)
: getAtomicAddress().getElementType();
-if (ValTy->isIntegerTy()) {
-
@@ -1410,13 +1414,14 @@ RValue
AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
auto *ValTy = AsValue
? CGF.ConvertTypeForMem(ValueTy)
: getAtomicAddress().getElementType();
-if (ValTy->isIntegerTy()) {
-
https://github.com/arsenm commented:
I don't really understand all of these manual scheduling control intrinsic.
This brings the total up to 4? I think effort would be better spent making the
scheduler better instead of giving users more footguns to shoot themselves (and
the compiler) with
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo ,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction ) {
+ const GCNSubtarget = MF.getSubtarget();
+ const SIInstrInfo *TII =
arsenm wrote:
> @arsenm That makes sense, I don't think MMRA fits the fine-grained use case
> either. Does that mean we can stick with the approach from this PR? @b-sumner
> mentioned there was another similar approach being worked on.
Something like this, but the naming and direction of this
@@ -170,20 +173,46 @@ static Value *appendString(IRBuilder<> , Value
*Desc, Value *Arg,
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
+static Value *appendVectorArg(IRBuilder<> , Value *Desc, Value *Arg,
arsenm wrote:
These are all still
@@ -202,12 +207,20 @@ RValue
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Args.push_back(Arg);
}
- llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
- IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+
@@ -202,12 +207,20 @@ RValue
CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Args.push_back(Arg);
}
- llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
- IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+
https://github.com/arsenm requested changes to this pull request.
https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -406,5 +410,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb",
"nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
+// OpenCL
@@ -3616,6 +3617,12 @@ unsigned FunctionDecl::getBuiltinID(bool
ConsiderWrapperFunctions) const {
if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
return 0;
+ // AMDGCN implementation supports printf as a builtin
+ // for OpenCL
+ if
@@ -2550,6 +2550,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
().getLongDoubleFormat() == ::APFloat::IEEEquad())
BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
+ // Mutate the printf builtin ID so that we use the same
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72556
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
reverse ping
https://github.com/llvm/llvm-project/pull/71019
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> In this case, MMRAs would only help in the sense that you won't need any new
> attributes and can just add an MMRA such as `atomic-lowering:fine-grained`.
> It's not really what MMRAs were made for (because this attribute doesn't
> affect semantics, just lowering style I
arsenm wrote:
> But the shared library stuff isn't an issue for AMDGPU, right?
No, we don't support shared library linking yet
https://github.com/llvm/llvm-project/pull/80475
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -137,6 +137,11 @@ Removed Compiler Flags
Attribute Changes in Clang
--
+- Introduced a new function attribute
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
+ ``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target.
https://github.com/arsenm requested changes to this pull request.
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
arsenm wrote:
Missing a test for the various IR attribute parsing error conditions
https://github.com/llvm/llvm-project/pull/79035
@@ -194,3 +204,105 @@ __global__ void non_cexpr_waves_per_eu_2() {}
// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to
be an integer constant}}
__attribute__((amdgpu_waves_per_eu(2, ipow2(2
__global__ void non_cexpr_waves_per_eu_2_4() {}
+
+//
@@ -139,6 +139,36 @@ kernel void
reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK: define{{.*}} amdgpu_kernel void
@reqd_work_group_size_32_2_1_flat_work_group_size_16_128()
[[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
}
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Attribute not specified.
+; CHECK-LABEL: {{^}}empty_no_attribute:
+define amdgpu_kernel void @empty_no_attribute() {
+entry:
+ ret void
+}
+
+; Ignore if number of work groups for x
@@ -356,6 +356,24 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getMaxNumWorkGroupsX()
+
@@ -814,6 +814,15 @@ bool shouldEmitConstantsToTextSection(const Triple );
/// to integer.
int getIntegerAttribute(const Function , StringRef Name, int Default);
+/// \returns Unsigned Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default if
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2714,7 +2714,17 @@ static void setLinkageForGV(llvm::GlobalValue *GV, const
NamedDecl *ND) {
void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
- // Only if we are
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
arsenm wrote:
> So overall, the practical effect of the `denormal-fp-math` attribute being
> set incorrectly doesn't appear to matter.
It matters more for AMDGPU, where we need to care because some instructions
just don't respect denormals. We legalize some operations differently depending
https://github.com/arsenm commented:
Title should be rephrased; this doesn't have anything to do with inlining
https://github.com/llvm/llvm-project/pull/81058
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2067,6 +2067,10 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type
*DstTy,
Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
+ // Skip cast if types are identical
arsenm wrote:
We're gradually converging on something that looks like this, subject to bike
shedding the name
https://github.com/llvm/llvm-project/pull/69229
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
arsenm wrote:
> Sorry, I should have clearly mentioned that. Yes, it is for my followup
> change #80908. In #80908, we changed the type of LLVM builtin but kept the
> corresponding clang builtin unchanged to avoid breaking existing uses.
Don't see how that could be related; you can
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y =
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/81108
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
Add to release notes?
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
Also should have follow up patch to propagate in AMDGPUAttributor, and another
to lower to !range in AMDGPULowerKernelAttributes
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -2031,6 +2031,13 @@ def AMDGPUNumVGPR : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}
+def AMDGPUNumWorkGroups : InheritableAttr {
+ let Spellings = [Clang<"amdgpu_num_work_groups", 0>];
arsenm wrote:
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y =
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s
| FileCheck %s
arsenm wrote:
Don't need -verify-machineinstrs
https://github.com/llvm/llvm-project/pull/79035
___
@@ -1108,3 +1108,8 @@ void
GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
}
+
+SmallVector
arsenm wrote:
std::array<3>?
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y =
https://github.com/arsenm commented:
Needs documentation in AMDGPUUsage. Should also clarify behavior of 0
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
+ case Intrinsic::readfixedtimer: {
+errs() << "WARNING: this target does not support the
@@ -3759,7 +3759,6 @@ def CALL_PROTOTYPE :
include "NVPTXIntrinsics.td"
-
arsenm wrote:
Random whitespace change
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
https://github.com/arsenm commented:
Missing the clang builtin test
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5908,7 +5908,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
}
}
-assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
+assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) &&
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -167,6 +167,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;
+def FeaturePreciseMemory
arsenm wrote:
The subtarget feature prefix should be removed. The subtarget feature name is
not the user facing component
@@ -0,0 +1,199 @@
+; Testing the -amdgpu-precise-memory-op option
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+amdgpu-precise-memory-op
-verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=+amdgpu-precise-memory-op
https://github.com/arsenm commented:
I think this needs codegen tests for the gfx900 vs. gfx906 mad_mix/fma_fix
issue
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
def int_amdgcn_fdot2_bf16_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
DefaultAttrsIntrinsic<
-[llvm_i16_ty], // %r
+[llvm_bfloat_ty], // %r
arsenm wrote:
Changing the clang
@@ -2835,8 +2835,8 @@ def int_amdgcn_fdot2_f32_bf16 :
DefaultAttrsIntrinsic<
[llvm_float_ty], // %r
[
- llvm_v2i16_ty, // %a
- llvm_v2i16_ty, // %b
+ llvm_v2bf16_ty, // %a
+ llvm_v2bf16_ty, // %b
arsenm wrote:
For potential
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User ,
bool IRTranslator::translateCast(unsigned Opcode, const User ,
MachineIRBuilder ) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
-
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
arsenm wrote:
does this help with #79369 at all?
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User ,
bool IRTranslator::translateCast(unsigned Opcode, const User ,
MachineIRBuilder ) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
-
arsenm wrote:
Next piece in #81108
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/81108
This completes the unrevert of ef388334ee5a3584255b9ef5b3fefdb244fa3fd7.
>From 7b5b50597e13c647ec70beab35dcc9b643bff42f Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 8 Feb 2024 14:15:33 +0530
Subject:
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/4] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
@@ -0,0 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4
-std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s
+
+//
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1
-disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt
-passes="expand-variadics,default" -S | FileCheck %s
--check-prefixes=CHECK,X86Linux
arsenm wrote:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
401 - 500 of 1140 matches
Mail list logo