https://github.com/arsenm requested changes to this pull request.
There should be no need to introduce same-sized value casts, whether bitcast or
ptrtoint in either legalizer
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,25 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+//RUN: %clang_cc1 %s -emit-llvm -O1 -o - | FileCheck %s
arsenm wrote:
codegen tests need an explicit target
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -780,14 +780,22 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32",
VOP_I32_I32_I32_ARITH, null_frag,
// These are special and do not read the exec mask.
let isConvergent = 1, Uses = [] in {
-def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
- [(set
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92294
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> It's still used:
>
> ```
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp: In member
> function ‘std::optional >
> mlir::ROCDL::SerializeGPUModuleBase::assembleIsa(llvm::StringRef)’:
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp:302:15:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92232
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
@@ -6086,6 +6086,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -3400,7 +3400,7 @@ def : GCNPat<
// FIXME: Should also do this for readlane, but tablegen crashes on
// the ignored src1.
def : GCNPat<
- (int_amdgcn_readfirstlane (i32 imm:$src)),
+ (i32 (AMDGPUreadfirstlane (i32 imm:$src))),
arsenm wrote:
We might need
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -58,7 +58,7 @@ CHECK-CNT3-NOT: {{^}}this is duplicate
CHECK-CNT4-COUNT-5: this is duplicate
CHECK-CNT4-EMPTY:
-Many-label:
+Many-LABEL:
arsenm wrote:
I would be careful about touching FileCheck tests. The point might be the wrong
label
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+*
https://github.com/arsenm commented:
amdgpu changes lgtm
https://github.com/llvm/llvm-project/pull/91854
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> > (You can even place `.quad sym[0].hash; .long sym[0].size` in a section
> > `SHF_LINK_ORDER` linking to the global variable for linker garbage
> > collection.)
> > The runtime can build a map correlating hashes to sizes, which can be used
> > to answer variable size queries.
@@ -2176,26 +2176,23 @@ def int_amdgcn_wave_reduce_umin : AMDGPUWaveReduce;
def int_amdgcn_wave_reduce_umax : AMDGPUWaveReduce;
def int_amdgcn_readfirstlane :
- ClangBuiltin<"__builtin_amdgcn_readfirstlane">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -2658,21 +2676,102 @@
IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
return InvertedMask;
}
+void IGroupLPDAGMutation::addSchedGroupBarrierRules() {
+
+ /// Whether or not the instruction has no true data predecessors
+ /// with opcode \p
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/85304
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1284,7 +1284,29 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
| ``// 5 MFMA``
|
``__builtin_amdgcn_sched_group_barrier(8, 5, 0)``
-
https://github.com/arsenm commented:
I don't understand how anyone is supposed to use this. This is exposing
extremely specific, random low level details of the scheduling. Users claim
they want scheduling controls, but what they actually want is the scheduler to
just do the right thing. We
@@ -247,7 +247,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction , Address
Addr1,
bool CodeGen::isEmptyField(ASTContext , const FieldDecl *FD,
bool AllowArrays, bool AsIfNoUniqueAddr) {
- if (FD->isUnnamedBitField())
+ if
@@ -157,7 +157,7 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction ,
llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32(
CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1);
return CGF.Builder.CreateIntrinsic(
-
@@ -24,6 +24,7 @@ MODULE_PASS("amdgpu-lower-ctor-dtor",
AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata",
@@ -0,0 +1,55 @@
+/*=== __clang_hip_device_macro_guards.h - guards for HIP device macros -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier:
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+*
@@ -1,22 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+// RUN:
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/90925
>From 4760ebce0ff7725f4bb75f5107f551d867e4db6d Mon Sep 17 00:00:00 2001
From: Ellis Hoag
Date: Thu, 2 May 2024 17:47:38 -0700
Subject: [PATCH 1/4] [modules] Accept equivalent module caches from different
symlink
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/88918
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
@@ -504,3 +508,16 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1,
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5982,6 +5982,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
arsenm wrote:
> ping Ping Do you have another review comment?
This has now confused me. You should roll back to the case where you only
changed the scalar behavior. Any vector behavior change should be a separate
PR, if that is even correct. I would still like to know what the gcc behavior
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/90994
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -504,3 +508,15 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1,
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
arsenm wrote:
> I'm now wondering if adding a new builtin is needed at all, or if it should
> just be part of the original builtin? It's an additive change.
Maybe?
>
> Should we also rename the MMRA to `amdgpu-fence-as` (remove OpenCL from the
> name) ?
>
I definitely do not want to
@@ -316,3 +316,82 @@ define <2 x i32> @test_trunc_both_reversed_vector(<2 x
i64> %a) {
%res = trunc nsw nuw <2 x i64> %a to <2 x i32>
ret <2 x i32> %res
}
+
+define ptr @gep_nuw(ptr %p, i64 %idx) {
+; CHECK: %gep = getelementptr nuw i8, ptr %p, i64 %idx
+ %gep =
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89477
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1462,6 +1460,14 @@ floating point semantic models: precise (the default),
strict, and fast.
"allow_approximate_fns", "off", "off", "on"
"allow_reassociation", "off", "off", "on"
+The ``-fp-model`` option does not modify the "fdenormal-fp-math" or
@@ -18319,6 +18320,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+llvm::Value *ASMask) {
+ constexpr const
@@ -4403,6 +4404,60 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence for
@@ -1906,7 +1909,15 @@ Value
*ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
} else {
assert(SrcEltTy->isFloatingPointTy() && DstEltTy->isFloatingPointTy() &&
"Unknown real conversion");
-if (DstEltTy->getTypeID() <
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
It is essential that clang should know about libclc. libclc exists purely as an
extension of the compiler. From the user perspective the opencl
@@ -0,0 +1,165 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80475
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Some of the GPU targets, IIRC, want daz/ftz by default. Not all targets have
> DAZ/FTZ bits that can be set; I think RISC-V is in this category, although to
> be honest, trying to track down all the ISA extensions to make sure is a bit
> beyond my ken.
>
OpenCL allows you to
@@ -842,25 +842,6 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList
,
ToolChain::addProfileRTLibs(Args, CmdArgs);
}
-llvm::DenormalMode
-Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList ,
- const JobAction ,
-
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
-
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
-
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx90a \
+// RUN: -verify -S -o - %s
+
arsenm wrote:
-verify tests belong in Sema, there's no codegen here
https://github.com/llvm/llvm-project/pull/85304
@@ -2747,23 +2749,32 @@ void
IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
int32_t SGMask = SGB.getOperand(0).getImm();
int32_t Size = SGB.getOperand(1).getImm();
int32_t SyncID = SGB.getOperand(2).getImm();
- std::optional RuleID =
+ std::optional RuleMask
@@ -437,16 +437,18 @@ void test_sched_group_barrier()
}
// CHECK-LABEL: @test_sched_group_barrier_rule
-// CHECK: call void @llvm.amdgcn.sched.group.barrier.rule(i32 0, i32 1, i32 2,
i32 0)
-// CHECK: call void @llvm.amdgcn.sched.group.barrier.rule(i32 1, i32 2, i32 4,
i32
@@ -69,6 +69,7 @@ BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
+BUILTIN(__builtin_amdgcn_masked_fence, "vUiUicC*", "n")
@@ -18319,6 +18320,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+llvm::Value *ASMask) {
+ constexpr const
@@ -0,0 +1,294 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \
+// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+
+// RUN:
@@ -54,3 +56,289 @@ void SPIRV64TargetInfo::getTargetDefines(const LangOptions
,
BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder);
DefineStd(Builder, "SPIRV64", Opts);
}
+
+static constexpr Builtin::Info BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS)
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89803
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -88,10 +88,25 @@ function(link_bc)
${ARGN}
)
+ set( LINK_INPUT_ARG ${ARG_INPUTS} )
+ if( WIN32 OR CYGWIN )
+# Create a response file in case the number of inputs exceeds command-line
+# character limits on certain platforms.
+file( TO_CMAKE_PATH
@@ -1431,9 +1431,13 @@ Value *ScalarExprEmitter::EmitScalarCast(Value *Src,
QualType SrcType,
return Builder.CreateFPToUI(Src, DstTy, "conv");
}
- if (DstElementTy->getTypeID() < SrcElementTy->getTypeID())
+ if ((DstElementTy->is16bitFPTy() &&
https://github.com/arsenm approved this pull request.
LGTM. Would be good to verify the vector case is "correct" in as far as it's
what GCC does
https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half
https://github.com/arsenm approved this pull request.
I think this works for the test. I'm slightly confused by the PR not-stacking
with the test changes on top of the other PR
https://github.com/llvm/llvm-project/pull/89687
___
cfe-commits mailing
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
,
return RetBB;
}
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr ,
arsenm wrote:
No, that's a generic pass. I would directly handle this in the legalizer, in
Author: Matt Arsenault
Date: 2024-04-22T11:35:09+02:00
New Revision: bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
URL:
https://github.com/llvm/llvm-project/commit/bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
DIFF:
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half
201 - 300 of 1149 matches
Mail list logo