@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+* `
@@ -1,22 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+// RUN: -tr
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/90925
>From 4760ebce0ff7725f4bb75f5107f551d867e4db6d Mon Sep 17 00:00:00 2001
From: Ellis Hoag
Date: Thu, 2 May 2024 17:47:38 -0700
Subject: [PATCH 1/4] [modules] Accept equivalent module caches from different
symlink
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int Priority
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/88918
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
cfe-commit
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
cfe-commit
@@ -504,3 +508,16 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1, node:$
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5982,6 +5982,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #[
arsenm wrote:
> ping Ping Do you have another review comment?
This has now confused me. You should roll back to the case where you only
changed the scalar behavior. Any vector behavior change should be a separate
PR, if that is even correct. I would still like to know what the gcc behavior
is
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/90994
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -504,3 +508,15 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1, node:$
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ Selection
arsenm wrote:
> I'm now wondering if adding a new builtin is needed at all, or if it should
> just be part of the original builtin? It's an additive change.
Maybe?
>
> Should we also rename the MMRA to `amdgpu-fence-as` (remove OpenCL from the
> name) ?
>
I definitely do not want to mainta
@@ -316,3 +316,82 @@ define <2 x i32> @test_trunc_both_reversed_vector(<2 x
i64> %a) {
%res = trunc nsw nuw <2 x i64> %a to <2 x i32>
ret <2 x i32> %res
}
+
+define ptr @gep_nuw(ptr %p, i64 %idx) {
+; CHECK: %gep = getelementptr nuw i8, ptr %p, i64 %idx
+ %gep = getelemen
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89477
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1462,6 +1460,14 @@ floating point semantic models: precise (the default),
strict, and fast.
"allow_approximate_fns", "off", "off", "on"
"allow_reassociation", "off", "off", "on"
+The ``-fp-model`` option does not modify the "fdenormal-fp-math" or
+"fdenormal-fp-math-f
@@ -18319,6 +18320,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+llvm::Value *ASMask) {
+ constexpr const ch
@@ -4403,6 +4404,60 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence for all
@@ -1906,7 +1909,15 @@ Value
*ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
} else {
assert(SrcEltTy->isFloatingPointTy() && DstEltTy->isFloatingPointTy() &&
"Unknown real conversion");
-if (DstEltTy->getTypeID() < SrcEltTy->getTypeID()
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #[
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
It is essential that clang should know about libclc. libclc exists purely as an
extension of the compiler. From the user perspective the opencl builti
@@ -0,0 +1,165 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]]) #
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80475
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Some of the GPU targets, IIRC, want daz/ftz by default. Not all targets have
> DAZ/FTZ bits that can be set; I think RISC-V is in this category, although to
> be honest, trying to track down all the ISA extensions to make sure is a bit
> beyond my ken.
>
OpenCL allows you to
@@ -842,25 +842,6 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList
&Args,
ToolChain::addProfileRTLibs(Args, CmdArgs);
}
-llvm::DenormalMode
-Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs,
- const JobAct
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
- CGM.getIn
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
- CGM.getIn
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu
gfx90a \
+// RUN: -verify -S -o - %s
+
arsenm wrote:
-verify tests belong in Sema, there's no codegen here
https://github.com/llvm/llvm-project/pull/85304
__
@@ -2747,23 +2749,32 @@ void
IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
int32_t SGMask = SGB.getOperand(0).getImm();
int32_t Size = SGB.getOperand(1).getImm();
int32_t SyncID = SGB.getOperand(2).getImm();
- std::optional RuleID =
+ std::optional RuleMask
@@ -437,16 +437,18 @@ void test_sched_group_barrier()
}
// CHECK-LABEL: @test_sched_group_barrier_rule
-// CHECK: call void @llvm.amdgcn.sched.group.barrier.rule(i32 0, i32 1, i32 2,
i32 0)
-// CHECK: call void @llvm.amdgcn.sched.group.barrier.rule(i32 1, i32 2, i32 4,
i32 0
@@ -69,6 +69,7 @@ BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
+BUILTIN(__builtin_amdgcn_masked_fence, "vUiUicC*", "n")
-
@@ -18319,6 +18320,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+llvm::Value *ASMask) {
+ constexpr const ch
@@ -0,0 +1,294 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \
+// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+
+// RUN: %clang_
@@ -54,3 +56,289 @@ void SPIRV64TargetInfo::getTargetDefines(const LangOptions
&Opts,
BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder);
DefineStd(Builder, "SPIRV64", Opts);
}
+
+static constexpr Builtin::Info BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS)
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89803
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -88,10 +88,25 @@ function(link_bc)
${ARGN}
)
+ set( LINK_INPUT_ARG ${ARG_INPUTS} )
+ if( WIN32 OR CYGWIN )
+# Create a response file in case the number of inputs exceeds command-line
+# character limits on certain platforms.
+file( TO_CMAKE_PATH ${LIBCLC
@@ -1431,9 +1431,13 @@ Value *ScalarExprEmitter::EmitScalarCast(Value *Src,
QualType SrcType,
return Builder.CreateFPToUI(Src, DstTy, "conv");
}
- if (DstElementTy->getTypeID() < SrcElementTy->getTypeID())
+ if ((DstElementTy->is16bitFPTy() && SrcElementTy->is16bitFPT
https://github.com/arsenm approved this pull request.
LGTM. Would be good to verify the vector case is "correct" in as far as it's
what GCC does
https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
h
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
https://github.com/arsenm approved this pull request.
I think this works for the test. I'm slightly confused by the PR not-stacking
with the test changes on top of the other PR
https://github.com/llvm/llvm-project/pull/89687
___
cfe-commits mailing l
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
return RetBB;
}
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI,
arsenm wrote:
No, that's a generic pass. I would directly handle this in the legalizer, in
S
Author: Matt Arsenault
Date: 2024-04-22T11:35:09+02:00
New Revision: bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
URL:
https://github.com/llvm/llvm-project/commit/bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc
DIFF:
https://github.com/llvm/llvm-project/commit/bd84f5d5d71ee26d9552a9cd96ef058cfb8a39fc.diff
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
arsenm wrote:
For AMDGPU 64 is probably right
https://github.com/llvm/llvm-project/pull/89446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
In a separate patch should have AMDGPUInstCombineIntrinsic try to fold bitcasts
into the intrinsic
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://li
@@ -18410,6 +18410,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_readlane:
+ case AMDGPU::BI__b
@@ -18410,6 +18410,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_readlane:
+ case AMDGPU::BI__b
@@ -4822,6 +4822,111 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
return RetBB;
}
+static MachineBasicBlock *lowerPseudoLaneOp(MachineInstr &MI,
arsenm wrote:
You should try to do this before selection. Doing it after just adds a lot of
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -271,30 +271,32 @@
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
// RUN: %clang -### -funsafe-math-optimizations -fno-reciprocal-math -c %s \
-// RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: 2>&1 | FileCheck --check-prefix=CH
@@ -318,12 +320,12 @@
// RUN: %clang -### -fassociative-math -freciprocal-math -fno-signed-zeros \
// RUN: -fno-trapping-math -ftrapping-math -c %s 2>&1 \
-// RUN: | FileCheck --check-prefix=CHECK-NO-REASSOC-NO-UNSAFE-MATH %s
+// RUN: | FileCheck --check-prefix=CHECK-N
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89473
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> But In some target, it supply a HW instruction to complete the process
> (fp16->float32->bf16) . so it just supply a intrinsic (fp16 -> bf16)
Which is not a bitcast. The correct IR representation of this conversion is
fpext+fptrunc
https://github.com/llvm/llvm-project/pull/89
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89147
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -39,6 +39,10 @@ function(compile_to_bc)
set( TARGET_ARG "-target" ${ARG_TRIPLE} )
endif()
+ # Ensure the directory we are told to output to exists
+ get_filename_component( ARG_OUTPUT_DIR ${ARG_OUTPUT} DIRECTORY )
arsenm wrote:
I thought there was
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm requested changes to this pull request.
Bitcast is not the correct behavior
https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listin
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
You can have different target coexist in the same test files when appropriate
and just multi-list REQUIRES. Clang does this regularly (OpenMP in parti
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
This just requires additional REQUIRES support in lit for the built targets
when running the test, the same as other built-backend dependent codegen t
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/87989
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
The target absolutely should be explicit in any testing files. Pretending these
tests can be generic is going to be an intractable problem
https://g
arsenm wrote:
> This appears to just assert today, but interpreting this as bitcast doesn't
> make sense. I would expect this to emit a pair of casts, fpext to float, and
> fptrunc down to half
If we don't just reject it as an invalid cast
https://github.com/llvm/llvm-project/pull/89051
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89051
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16
-S -emit-llvm %s -o - | FileCheck %s
+// CHECK-LABEL: define dso_local half @test_
https://github.com/arsenm commented:
This appears to just assert today, but interpreting this as bitcast doesn't
make sense. I would expect this to emit a pair of casts, fpext to float, and
fptrunc down to half
https://github.com/llvm/llvm-project/pull/89051
__
@@ -115,7 +115,13 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bo
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1056 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,43 @@
+//===- ExpandVariadics.h - expand variadic functions *- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -115,7 +115,13 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+ const bo
@@ -154,11 +154,20 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *Ptr,
CharUnits Align) {
// OverflowArgArea = (OverflowArgArea
@@ -154,11 +154,20 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
llvm::Value *Ptr,
CharUnits Align) {
// OverflowArgArea = (OverflowArgArea
501 - 600 of 1415 matches
Mail list logo