https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/195813
>From e7a6a39f855708c339dfbae95d97879048bccd18 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Tue, 5 May 2026 14:58:41 +0530 Subject: [PATCH 1/3] [CIR] Implement function target/tune attrs and FMV metadata. --- clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/CIRGenModule.cpp | 69 ++++- clang/test/CIR/CodeGen/attr-target-aarch64.c | 244 ++++++++++++++++++ clang/test/CIR/CodeGen/attr-target-x86.c | 186 +++++++++++++ .../CIR/CodeGenHIP/attr-target-amdgpu.hip | 74 ++++++ 5 files changed, 570 insertions(+), 4 deletions(-) create mode 100644 clang/test/CIR/CodeGen/attr-target-aarch64.c create mode 100644 clang/test/CIR/CodeGen/attr-target-x86.c create mode 100644 clang/test/CIR/CodeGenHIP/attr-target-amdgpu.hip diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index ba5c2bf786a99..6b231ec72a806 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -78,7 +78,6 @@ struct MissingFeatures { static bool opFuncMaybeHandleStaticInExternC() { return false; } static bool opFuncMinSizeAttr() { return false; } static bool opFuncMultipleReturnVals() { return false; } - static bool opFuncMultiVersioning() { return false; } static bool opFuncNakedAttr() { return false; } static bool opFuncNoDuplicateAttr() { return false; } static bool opFuncOpenCLKernelMetadata() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 04e413aa916ec..b051f59e551e3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -779,7 +779,46 @@ bool CIRGenModule::getCPUAndFeaturesAttributes( const auto *tc = fd ? fd->getAttr<TargetClonesAttr>() : nullptr; bool addedAttr = false; if (td || tv || sd || tc) { - assert(!cir::MissingFeatures::opFuncMultiVersioning()); + llvm::StringMap<bool> featureMap; + astContext.getFunctionFeatureMap(featureMap, gd); + + // Now add the target-cpu and target-features to the function. + // While we populated the feature map above, we still need to + // get and parse the target/target_clones attribute so we can + // get the cpu for the function. + llvm::StringRef featureStr = td ? td->getFeaturesStr() : llvm::StringRef(); + if (tc && (getTriple().isOSAIX() || getTriple().isX86())) + featureStr = tc->getFeatureStr(gd.getMultiVersionIndex()); + if (!featureStr.empty()) { + clang::ParsedTargetAttr parsedAttr = + getTarget().parseTargetAttr(featureStr); + if (!parsedAttr.CPU.empty() && + getTarget().isValidCPUName(parsedAttr.CPU)) { + targetCPU = parsedAttr.CPU; + tuneCPU = ""; // Clear the tune CPU. + } + if (!parsedAttr.Tune.empty() && + getTarget().isValidCPUName(parsedAttr.Tune)) + tuneCPU = parsedAttr.Tune; + } + + if (sd) { + // Apply the given CPU name as the 'tune-cpu' so that the optimizer can + // favor this processor. + tuneCPU = sd->getCPUName(gd.getMultiVersionIndex())->getName(); + } + + // For AMDGPU, only emit delta features (features that differ from the + // target CPU's defaults). Other targets might want to follow a similar + // pattern. + if (getTarget().getTriple().isAMDGPU()) { + features = getFeatureDeltaFromDefault(*this, targetCPU, featureMap); + } else { + // Produce the canonical string for this set of features. + for (const auto &entry : featureMap) + features.push_back((entry.getValue() ? "+" : "-") + + entry.getKey().str()); + } } else { // Just add the existing target cpu and target features to the function. if (setTargetFeatures && getTarget().getTriple().isAMDGPU()) { @@ -814,8 +853,32 @@ bool CIRGenModule::getCPUAndFeaturesAttributes( attrs["cir.target-features"] = llvm::join(features, ","); addedAttr = true; } - // TODO(cir): add metadata for AArch64 Function Multi Versioning. - assert(!cir::MissingFeatures::opFuncMultiVersioning()); + // Add metadata for AArch64 Function Multi Versioning. An empty string value + // for "cir.fmv-features" represents the default version (matches OGCG's + // value-less LLVM attribute). + if (getTarget().getTriple().isAArch64()) { + llvm::SmallVector<llvm::StringRef, 8> feats; + bool isDefault = false; + if (tv) { + isDefault = tv->isDefaultVersion(); + tv->getFeatures(feats); + } else if (tc) { + isDefault = tc->isDefaultVersion(gd.getMultiVersionIndex()); + tc->getFeatures(feats, gd.getMultiVersionIndex()); + } + if (isDefault) { + attrs["cir.fmv-features"] = ""; + addedAttr = true; + } else if (!feats.empty()) { + // Sort features and remove duplicates. + std::set<llvm::StringRef> orderedFeats(feats.begin(), feats.end()); + std::string fmvFeatures; + for (llvm::StringRef f : orderedFeats) + fmvFeatures.append("," + f.str()); + attrs["cir.fmv-features"] = fmvFeatures.substr(1); + addedAttr = true; + } + } return addedAttr; } diff --git a/clang/test/CIR/CodeGen/attr-target-aarch64.c b/clang/test/CIR/CodeGen/attr-target-aarch64.c new file mode 100644 index 0000000000000..769c11e9ab688 --- /dev/null +++ b/clang/test/CIR/CodeGen/attr-target-aarch64.c @@ -0,0 +1,244 @@ +// RUN: %clang_cc1 -triple aarch64 -fclangir -emit-cir %s -o - \ +// RUN: | FileCheck --check-prefix=CIR %s +// RUN: %clang_cc1 -triple aarch64 -fclangir -emit-llvm %s -o - \ +// RUN: | FileCheck --check-prefix=LLVM %s +// RUN: %clang_cc1 -triple aarch64 -emit-llvm %s -o - \ +// RUN: | FileCheck --check-prefix=LLVM %s + +__attribute__((target("arch=armv8.2-a"))) +void v82(void) {} + +// CIR: cir.func{{.*}} @v82() +// CIR-SAME: "cir.target-features" = "+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" +// CIR-NOT: "cir.target-cpu" +// LLVM-DAG: define{{.*}} void @v82(){{.*}} #[[ATTR_V82:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V82]] = {{.*}}"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" + +// target("arch=armv8.2-a+sve"): arch with SVE extension. +__attribute__((target("arch=armv8.2-a+sve"))) +void v82sve(void) {} + +// CIR: cir.func{{.*}} @v82sve() +// CIR-SAME: "cir.target-features" = "+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" +// LLVM-DAG: define{{.*}} void @v82sve(){{.*}} #[[ATTR_V82SVE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V82SVE]] = {{.*}}"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" + +// target("arch=armv8.2-a+sve2"): arch + sve2 implies +sve. +__attribute__((target("arch=armv8.2-a+sve2"))) +void v82sve2(void) {} + +// CIR: cir.func{{.*}} @v82sve2() +// CIR-SAME: "cir.target-features" = "+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" +// LLVM-DAG: define{{.*}} void @v82sve2(){{.*}} #[[ATTR_V82SVE2:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V82SVE2]] = {{.*}}"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" + +// target("arch=armv8.2-a+sve+sve2"): same effective feature set as v82sve2; +// reuses ATTR_V82SVE2. +__attribute__((target("arch=armv8.2-a+sve+sve2"))) +void v82svesve2(void) {} + +// LLVM-DAG: define{{.*}} void @v82svesve2(){{.*}} #[[ATTR_V82SVE2]] + +// target("arch=armv8.6-a+sve2"): later baseline + sve2. +__attribute__((target("arch=armv8.6-a+sve2"))) +void v86sve2(void) {} + +// CIR: cir.func{{.*}} @v86sve2() +// CIR-SAME: "cir.target-features" = "+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" +// LLVM-DAG: define{{.*}} void @v86sve2(){{.*}} #[[ATTR_V86SVE2:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V86SVE2]] = {{.*}}"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" + +// target("cpu=cortex-a710"): cpu override pulls in cortex-a710's baseline. +__attribute__((target("cpu=cortex-a710"))) +void a710(void) {} + +// CIR: cir.func{{.*}} @a710() +// CIR-SAME: "cir.target-cpu" = "cortex-a710" +// CIR-SAME: "cir.target-features" = "+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" +// LLVM-DAG: define{{.*}} void @a710(){{.*}} #[[ATTR_A710:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_A710]] = {{.*}}"target-cpu"="cortex-a710"{{.*}}"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" + +// target("tune=cortex-a710"): only tune-cpu changes; no target-cpu, no +// target-features (global features unchanged from base aarch64 defaults). +__attribute__((target("tune=cortex-a710"))) +void tunea710(void) {} + +// CIR: cir.func{{.*}} @tunea710() +// CIR-SAME: "cir.tune-cpu" = "cortex-a710" +// CIR-NOT: "cir.target-cpu" +// CIR-NOT: "cir.target-features" +// LLVM-DAG: define{{.*}} void @tunea710(){{.*}} #[[ATTR_TUNEA710:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_TUNEA710]] = {{.*}}"tune-cpu"="cortex-a710" + +// target("cpu=generic"): generic cpu pulls in its small feature set. +__attribute__((target("cpu=generic"))) +void generic(void) {} + +// CIR: cir.func{{.*}} @generic() +// CIR-SAME: "cir.target-cpu" = "generic" +// CIR-SAME: "cir.target-features" = "+ete,+fp-armv8,+neon,+trbe,+v8a" +// LLVM-DAG: define{{.*}} void @generic(){{.*}} #[[ATTR_GENERIC:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_GENERIC]] = {{.*}}"target-cpu"="generic"{{.*}}"target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" + +// target("tune=generic"): only tune-cpu set. +__attribute__((target("tune=generic"))) +void tune(void) {} + +// CIR: cir.func{{.*}} @tune() +// CIR-SAME: "cir.tune-cpu" = "generic" +// CIR-NOT: "cir.target-cpu" +// CIR-NOT: "cir.target-features" +// LLVM-DAG: define{{.*}} void @tune(){{.*}} #[[ATTR_TUNE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_TUNE]] = {{.*}}"tune-cpu"="generic" + +// target("cpu=neoverse-n1,tune=cortex-a710"): both cpu and tune set. +__attribute__((target("cpu=neoverse-n1,tune=cortex-a710"))) +void n1tunea710(void) {} + +// CIR: cir.func{{.*}} @n1tunea710() +// CIR-SAME: "cir.target-cpu" = "neoverse-n1" +// CIR-SAME: "cir.target-features" = "+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" +// CIR-SAME: "cir.tune-cpu" = "cortex-a710" +// LLVM-DAG: define{{.*}} void @n1tunea710(){{.*}} #[[ATTR_N1TUNE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_N1TUNE]] = {{.*}}"target-cpu"="neoverse-n1"{{.*}}"target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a"{{.*}}"tune-cpu"="cortex-a710" + +// target("sve,tune=cortex-a710"): feature add + tune. No cpu override. +__attribute__((target("sve,tune=cortex-a710"))) +void svetunea710(void) {} + +// CIR: cir.func{{.*}} @svetunea710() +// CIR-SAME: "cir.target-features" = "+fp-armv8,+fullfp16,+sve" +// CIR-SAME: "cir.tune-cpu" = "cortex-a710" +// CIR-NOT: "cir.target-cpu" +// LLVM-DAG: define{{.*}} void @svetunea710(){{.*}} #[[ATTR_SVETUNE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_SVETUNE]] = {{.*}}"target-features"="+fp-armv8,+fullfp16,+sve"{{.*}}"tune-cpu"="cortex-a710" + +// target("+sve,tune=cortex-a710"): explicit "+" prefix; same effect; reuses +// ATTR_SVETUNE. +__attribute__((target("+sve,tune=cortex-a710"))) +void plussvetunea710(void) {} + +// LLVM-DAG: define{{.*}} void @plussvetunea710(){{.*}} #[[ATTR_SVETUNE]] + +// target("cpu=neoverse-v1,+sve2"): cpu + extra feature. +__attribute__((target("cpu=neoverse-v1,+sve2"))) +void v1plussve2(void) {} + +// CIR: cir.func{{.*}} @v1plussve2() +// CIR-SAME: "cir.target-cpu" = "neoverse-v1" +// CIR-SAME: "cir.target-features" = "+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" +// LLVM-DAG: define{{.*}} void @v1plussve2(){{.*}} #[[ATTR_V1SVE2:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V1SVE2]] = {{.*}}"target-cpu"="neoverse-v1"{{.*}}"target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" + +// target("cpu=neoverse-v1+sve2"): cpu+feature without comma; same effect; +// reuses ATTR_V1SVE2. +__attribute__((target("cpu=neoverse-v1+sve2"))) +void v1sve2(void) {} + +// LLVM-DAG: define{{.*}} void @v1sve2(){{.*}} #[[ATTR_V1SVE2]] + +// target("cpu=neoverse-v1,+nosve"): cpu + feature negation via "+no" prefix. +__attribute__((target("cpu=neoverse-v1,+nosve"))) +void v1minussve(void) {} + +// CIR: cir.func{{.*}} @v1minussve() +// CIR-SAME: "cir.target-cpu" = "neoverse-v1" +// CIR-SAME: "cir.target-features" = "+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" +// LLVM-DAG: define{{.*}} void @v1minussve(){{.*}} #[[ATTR_V1NOSVE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_V1NOSVE]] = {{.*}}"target-cpu"="neoverse-v1"{{.*}}"target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" + +// target("cpu=neoverse-v1,no-sve"): "no-" prefix; same effect; reuses +// ATTR_V1NOSVE. +__attribute__((target("cpu=neoverse-v1,no-sve"))) +void v1nosve(void) {} + +// LLVM-DAG: define{{.*}} void @v1nosve(){{.*}} #[[ATTR_V1NOSVE]] + +// target("cpu=neoverse-v1+nosve"): cpu+nosve without comma; same effect; +// reuses ATTR_V1NOSVE. +__attribute__((target("cpu=neoverse-v1+nosve"))) +void v1msve(void) {} + +// LLVM-DAG: define{{.*}} void @v1msve(){{.*}} #[[ATTR_V1NOSVE]] + +// target("+sve"): single feature add; no cpu/tune override. +__attribute__((target("+sve"))) +void plussve(void) {} + +// CIR: cir.func{{.*}} @plussve() +// CIR-SAME: "cir.target-features" = "+fp-armv8,+fullfp16,+sve" +// CIR-NOT: "cir.target-cpu" +// LLVM-DAG: define{{.*}} void @plussve(){{.*}} #[[ATTR_PLUSSVE:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_PLUSSVE]] = {{.*}}"target-features"="+fp-armv8,+fullfp16,+sve" + +// target("+sve+nosve2"): chained features; nosve2 is a no-op; reuses +// ATTR_PLUSSVE. +__attribute__((target("+sve+nosve2"))) +void plussveplussve2(void) {} + +// LLVM-DAG: define{{.*}} void @plussveplussve2(){{.*}} #[[ATTR_PLUSSVE]] + +// target("sve,no-sve2"): comma-separated equivalent; same effect; reuses +// ATTR_PLUSSVE. +__attribute__((target("sve,no-sve2"))) +void plussveminusnosve2(void) {} + +// LLVM-DAG: define{{.*}} void @plussveminusnosve2(){{.*}} #[[ATTR_PLUSSVE]] + +// target("+fp16"): just adds fp16. +__attribute__((target("+fp16"))) +void plusfp16(void) {} + +// CIR: cir.func{{.*}} @plusfp16() +// CIR-SAME: "cir.target-features" = "+fp-armv8,+fullfp16" +// CIR-NOT: "cir.target-cpu" +// LLVM-DAG: define{{.*}} void @plusfp16(){{.*}} #[[ATTR_FP16:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_FP16]] = {{.*}}"target-features"="+fp-armv8,+fullfp16" + +// target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2"): everything +// at once. arch overrides do NOT clear cpu= here. +__attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2"))) +void all(void) {} + +// CIR: cir.func{{.*}} @all() +// CIR-SAME: "cir.target-cpu" = "neoverse-n1" +// CIR-SAME: "cir.target-features" = "+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" +// CIR-SAME: "cir.tune-cpu" = "cortex-a710" +// LLVM-DAG: define{{.*}} void @all(){{.*}} #[[ATTR_ALL:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_ALL]] = {{.*}}"target-cpu"="neoverse-n1"{{.*}}"target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"{{.*}}"tune-cpu"="cortex-a710" + +// target("+nosimd"): "+no" prefix for simd; produces no target-features (the +// negation cancels the default neon, leaving an empty effective delta). +__attribute__((target("+nosimd"))) +void plusnosimd(void) {} + +// CIR: cir.func{{.*}} @plusnosimd() +// CIR-NOT: "cir.target-cpu" +// CIR-NOT: "cir.target-features" +// LLVM-DAG: define{{.*}} void @plusnosimd(){{.*}} #[[ATTR_NOSIMD:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_NOSIMD]] = { {{.*}} } + +// target("no-simd"): equivalent "no-" syntax; reuses ATTR_NOSIMD. +__attribute__((target("no-simd"))) +void nosimd(void) {} + +// LLVM-DAG: define{{.*}} void @nosimd(){{.*}} #[[ATTR_NOSIMD]] + +// target("no-v9.3a"): disable an arch-level feature without enabling anything. +__attribute__((target("no-v9.3a"))) +void minusarch(void) {} + +// CIR: cir.func{{.*}} @minusarch() +// CIR-SAME: "cir.target-features" = "-v9.3a" +// LLVM-DAG: define{{.*}} void @minusarch(){{.*}} #[[ATTR_MINUSARCH:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_MINUSARCH]] = {{.*}}"target-features"="-v9.3a" + +// target("cpu=apple-m4"): another cpu with a large feature set. +__attribute__((target("cpu=apple-m4"))) +void applem4(void) {} + +// CIR: cir.func{{.*}} @applem4() +// CIR-SAME: "cir.target-cpu" = "apple-m4" +// CIR-SAME: "cir.target-features" = "+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" +// LLVM-DAG: define{{.*}} void @applem4(){{.*}} #[[ATTR_APPLEM4:[0-9]+]] +// LLVM-DAG: attributes #[[ATTR_APPLEM4]] = {{.*}}"target-cpu"="apple-m4"{{.*}}"target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" diff --git a/clang/test/CIR/CodeGen/attr-target-x86.c b/clang/test/CIR/CodeGen/attr-target-x86.c new file mode 100644 index 0000000000000..7de15ec447abe --- /dev/null +++ b/clang/test/CIR/CodeGen/attr-target-x86.c @@ -0,0 +1,186 @@ +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 \ +// RUN: -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR + +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 \ +// RUN: -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 \ +// RUN: -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +// LLVM: define {{.*}}@f_default({{.*}} [[f_default:#[0-9]+]] +// LLVM: define {{.*}}@f_avx_sse4_2_ivybridge({{.*}} [[f_avx_sse4_2_ivybridge:#[0-9]+]] +// LLVM: define {{.*}}@f_fpmath_387({{.*}} [[f_default]] +// LLVM: define {{.*}}@f_no_sse2({{.*}} [[f_no_sse2:#[0-9]+]] +// LLVM: define {{.*}}@f_sse4({{.*}} [[f_sse4:#[0-9]+]] +// LLVM: define {{.*}}@f_no_sse4({{.*}} [[f_no_sse4:#[0-9]+]] +// LLVM: define {{.*}}@f_default2({{.*}} [[f_default]] +// LLVM: define {{.*}}@f_avx_sse4_2_ivybridge_2({{.*}} [[f_avx_sse4_2_ivybridge]] +// LLVM: define {{.*}}@f_no_aes_ivybridge({{.*}} [[f_no_aes_ivybridge:#[0-9]+]] +// LLVM: define {{.*}}@f_no_mmx({{.*}} [[f_no_mmx:#[0-9]+]] +// LLVM: define {{.*}}@f_lakemont_mmx({{.*}} [[f_lakemont_mmx:#[0-9]+]] +// LLVM: define {{.*}}@f_use_before_def({{.*}} [[f_lakemont_mmx]] +// LLVM: define {{.*}}@f_tune_sandybridge({{.*}} [[f_tune_sandybridge:#[0-9]+]] +// LLVM: define {{.*}}@f_x86_64_v2({{.*}} [[f_x86_64_v2:#[0-9]+]] +// LLVM: define {{.*}}@f_x86_64_v3({{.*}} [[f_x86_64_v3:#[0-9]+]] +// LLVM: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]] +// LLVM: define {{.*}}@f_avx10_1{{.*}} [[f_avx10_1:#[0-9]+]] +// LLVM: define {{.*}}@f_prefer_256_bit({{.*}} [[f_prefer_256_bit:#[0-9]+]] +// LLVM: define {{.*}}@f_no_prefer_256_bit({{.*}} [[f_no_prefer_256_bit:#[0-9]+]] + +// CIR: cir.func{{.*}} @f_default() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+cx8,+x87" +// CIR-SAME: "cir.tune-cpu" = "i686" + +// LLVM: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" +void f_default(void) {} + +// CIR: cir.func{{.*}} @f_avx_sse4_2_ivybridge() +// CIR-SAME: "cir.target-cpu" = "ivybridge" +// CIR-SAME: "cir.target-features" = "+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +// CIR-NOT: "cir.tune-cpu" + +// LLVM: [[f_avx_sse4_2_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +__attribute__((target("avx,sse4.2,arch=ivybridge"))) +void f_avx_sse4_2_ivybridge(void) {} + +// fpmath= is currently ignored, so f_fpmath_387 has identical attributes to +// f_default. +// CIR: cir.func{{.*}} @f_fpmath_387() +__attribute__((target("fpmath=387"))) +void f_fpmath_387(void) {} + +// CIR: cir.func{{.*}} @f_no_sse2() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" +// CIR-SAME: "cir.tune-cpu" = "i686" + +// LLVM-NOT: tune-cpu +// LLVM: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +__attribute__((target("no-sse2"))) +void f_no_sse2(void) {} + +// CIR: cir.func{{.*}} @f_sse4() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CIR-SAME: "cir.tune-cpu" = "i686" + +// LLVM: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" +__attribute__((target("sse4"))) +void f_sse4(void) {} + + +// CIR: cir.func{{.*}} @f_no_sse4() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" +// CIR-SAME: "cir.tune-cpu" = "i686" + +// LLVM: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +__attribute__((target("no-sse4"))) +void f_no_sse4(void) {} + +// f_default2: no per-function attribute, identical attributes to f_default +// (checked above). +void f_default2(void) { + f_avx_sse4_2_ivybridge(); + return f_default(); +} + +// f_avx_sse4_2_ivybridge_2: same attributes as f_avx_sse4_2_ivybridge despite +// the extra whitespace in the target string. +__attribute__((target("avx, sse4.2, arch= ivybridge"))) +void f_avx_sse4_2_ivybridge_2(void) {} + +// CIR: cir.func{{.*}} @f_no_aes_ivybridge() +// CIR-SAME: "cir.target-cpu" = "ivybridge" +// CIR-SAME: "cir.target-features" = "+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +// CIR-NOT: "cir.tune-cpu" + +// LLVM: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +__attribute__((target("no-aes, arch=ivybridge"))) +void f_no_aes_ivybridge(void) {} + +// CIR: cir.func{{.*}} @f_no_mmx() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+cx8,+x87,-mmx" +// CIR-SAME: "cir.tune-cpu" = "i686" + +// LLVM: [[f_no_mmx]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"{{.*}}"tune-cpu"="i686" +__attribute__((target("no-mmx"))) +void f_no_mmx(void) {} + +// CIR: cir.func{{.*}} @f_lakemont_mmx() +// CIR-SAME: "cir.target-cpu" = "lakemont" +// CIR-SAME: "cir.target-features" = "+cx8,+mmx" + +// LLVM: [[f_lakemont_mmx]] = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" +__attribute__((target("arch=lakemont,mmx"))) +void f_lakemont_mmx(void) {} + +void f_use_before_def(void); +void usage(void){ + f_use_before_def(); +} + +// f_use_before_def: same attributes as f_lakemont_mmx (checked above) - the +// definition's attribute should be propagated to the earlier declaration. +__attribute__((target("arch=lakemont,mmx"))) +void f_use_before_def(void) {} + +// CIR: cir.func{{.*}} @f_tune_sandybridge() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+cmov,+cx8,+x87" +// CIR-SAME: "cir.tune-cpu" = "sandybridge" + +// LLVM: [[f_tune_sandybridge]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge" +__attribute__((target("tune=sandybridge"))) +void f_tune_sandybridge(void) {} + +// CIR: cir.func{{.*}} @f_x86_64_v2() +// CIR-SAME: "cir.target-cpu" = "x86-64-v2" +// CIR-SAME: "cir.target-features" = "+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" + +// LLVM: [[f_x86_64_v2]] ={{.*}}"target-cpu"="x86-64-v2" +// LLVM-SAME: "target-features"="+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +__attribute__((target("arch=x86-64-v2"))) +void f_x86_64_v2(void) {} + +// CIR: cir.func{{.*}} @f_x86_64_v3() +// CIR-SAME: "cir.target-cpu" = "x86-64-v3" +// CIR-SAME: "cir.target-features" = "+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" + +// LLVM: [[f_x86_64_v3]] = {{.*}}"target-cpu"="x86-64-v3" +// LLVM-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +__attribute__((target("arch=x86-64-v3"))) +void f_x86_64_v3(void) {} + +// CIR: cir.func{{.*}} @f_x86_64_v4() +// CIR-SAME: "cir.target-cpu" = "x86-64-v4" +// CIR-SAME: "cir.target-features" = "+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" + +// LLVM: [[f_x86_64_v4]] = {{.*}}"target-cpu"="x86-64-v4" +// LLVM-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +__attribute__((target("arch=x86-64-v4"))) +void f_x86_64_v4(void) {} + +// CIR: cir.func{{.*}} @f_avx10_1() +// CIR-SAME: "cir.target-cpu" = "i686" +// CIR-SAME: "cir.target-features" = "+avx,+avx10.1,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" + +// LLVM: [[f_avx10_1]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +__attribute__((target("avx10.1"))) +void f_avx10_1(void) {} + +// CIR: cir.func{{.*}} @f_prefer_256_bit() +// CIR-SAME: "cir.target-features" = "{{.*}}+prefer-256-bit{{.*}}" + +// LLVM: [[f_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}+prefer-256-bit +__attribute__((target("prefer-256-bit"))) +void f_prefer_256_bit(void) {} + +// CIR: cir.func{{.*}} @f_no_prefer_256_bit() +// CIR-SAME: "cir.target-features" = "{{.*}}-prefer-256-bit{{.*}}" + +// LLVM: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit +__attribute__((target("no-prefer-256-bit"))) +void f_no_prefer_256_bit(void) {} diff --git a/clang/test/CIR/CodeGenHIP/attr-target-amdgpu.hip b/clang/test/CIR/CodeGenHIP/attr-target-amdgpu.hip new file mode 100644 index 0000000000000..b5b75752177b6 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/attr-target-amdgpu.hip @@ -0,0 +1,74 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target + +// Default behavior for gfx90a: test_default has no target-features, +// test_explicit_attr has only the delta (+gfx11-insts). + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -target-cpu gfx90a -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-GFX90A %s --input-file=%t.cir + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -target-cpu gfx90a -emit-llvm %s -o %t.cir.ll +// RUN: FileCheck --check-prefix=LLVM-GFX90A %s --input-file=%t.cir.ll + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -target-cpu gfx90a -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-GFX90A %s --input-file=%t.ll + +// With -target-feature, both functions get the delta feature. +// gfx1030 defaults to wavefrontsize32, so +wavefrontsize64 is a delta. + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -target-cpu gfx1030 -target-feature +wavefrontsize64 \ +// RUN: -emit-cir %s -o %t-cmdline.cir +// RUN: FileCheck --check-prefix=CIR-CMDLINE %s --input-file=%t-cmdline.cir + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -target-cpu gfx1030 -target-feature +wavefrontsize64 \ +// RUN: -emit-llvm %s -o %t-cmdline.cir.ll +// RUN: FileCheck --check-prefix=LLVM-CMDLINE %s --input-file=%t-cmdline.cir.ll + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -target-cpu gfx1030 -target-feature +wavefrontsize64 \ +// RUN: -emit-llvm %s -o %t-cmdline.ll +// RUN: FileCheck --check-prefix=LLVM-CMDLINE %s --input-file=%t-cmdline.ll + +// gfx90a + no extra features: test_default has only target-cpu, no +// target-features. test_explicit_attr has cpu + only the per-function delta. + +// CIR-GFX90A: cir.func{{.*}} @_Z12test_defaultv() +// CIR-GFX90A-SAME: "cir.target-cpu" = "gfx90a" +// CIR-GFX90A-NOT: "cir.target-features" + +// CIR-GFX90A: cir.func{{.*}} @_Z18test_explicit_attrv() +// CIR-GFX90A-SAME: "cir.target-cpu" = "gfx90a" +// CIR-GFX90A-SAME: "cir.target-features" = "+gfx11-insts" + +// LLVM-GFX90A: define{{.*}} @_Z12test_defaultv(){{.*}} #[[ATTR_DEFAULT_90A:[0-9]+]] +// LLVM-GFX90A: define{{.*}} @_Z18test_explicit_attrv(){{.*}} #[[ATTR_EXPLICIT_90A:[0-9]+]] +// LLVM-GFX90A-DAG: attributes #[[ATTR_DEFAULT_90A]] = {{.*}}"target-cpu"="gfx90a" +// LLVM-GFX90A-NOT: attributes #[[ATTR_DEFAULT_90A]] = {{.*}}"target-features" +// LLVM-GFX90A-DAG: attributes #[[ATTR_EXPLICIT_90A]] = {{.*}}"target-cpu"="gfx90a"{{.*}}"target-features"="+gfx11-insts" + +// gfx1030 + cmdline +wavefrontsize64: test_default gets the cmdline delta; +// test_explicit_attr gets both the per-function and cmdline deltas. + +// CIR-CMDLINE: cir.func{{.*}} @_Z12test_defaultv() +// CIR-CMDLINE-SAME: "cir.target-cpu" = "gfx1030" +// CIR-CMDLINE-SAME: "cir.target-features" = "+wavefrontsize64" + +// CIR-CMDLINE: cir.func{{.*}} @_Z18test_explicit_attrv() +// CIR-CMDLINE-SAME: "cir.target-cpu" = "gfx1030" +// CIR-CMDLINE-SAME: "cir.target-features" = "{{[^"]*}}+gfx11-insts{{[^"]*}}+wavefrontsize64{{[^"]*}}" + +// LLVM-CMDLINE: define{{.*}} @_Z12test_defaultv(){{.*}} #[[ATTR_DEFAULT_1030:[0-9]+]] +// LLVM-CMDLINE: define{{.*}} @_Z18test_explicit_attrv(){{.*}} #[[ATTR_EXPLICIT_1030:[0-9]+]] +// LLVM-CMDLINE-DAG: attributes #[[ATTR_DEFAULT_1030]] = {{.*}}"target-cpu"="gfx1030"{{.*}}"target-features"="+wavefrontsize64" +// LLVM-CMDLINE-DAG: attributes #[[ATTR_EXPLICIT_1030]] = {{.*}}"target-cpu"="gfx1030"{{.*}}"target-features"="{{[^"]*}}+gfx11-insts{{[^"]*}}+wavefrontsize64{{[^"]*}}" + +__global__ void test_default() {} + +__attribute__((target("gfx11-insts"))) +__device__ void test_explicit_attr() {} >From 907d9c71ddeade04b6724041c2a22f6929b998f3 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Wed, 6 May 2026 11:39:01 +0530 Subject: [PATCH 2/3] use interleaveComma and features.reserve --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 9 ++++-- clang/test/CIR/Lowering/fmv-features.cir | 35 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 clang/test/CIR/Lowering/fmv-features.cir diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b051f59e551e3..8485770006949 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -34,8 +34,10 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Interfaces/CIROpInterfaces.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" #include "CIRGenFunctionInfo.h" #include "TargetInfo.h" @@ -815,6 +817,7 @@ bool CIRGenModule::getCPUAndFeaturesAttributes( features = getFeatureDeltaFromDefault(*this, targetCPU, featureMap); } else { // Produce the canonical string for this set of features. + features.reserve(features.size() + featureMap.size()); for (const auto &entry : featureMap) features.push_back((entry.getValue() ? "+" : "-") + entry.getKey().str()); @@ -873,9 +876,9 @@ bool CIRGenModule::getCPUAndFeaturesAttributes( // Sort features and remove duplicates. std::set<llvm::StringRef> orderedFeats(feats.begin(), feats.end()); std::string fmvFeatures; - for (llvm::StringRef f : orderedFeats) - fmvFeatures.append("," + f.str()); - attrs["cir.fmv-features"] = fmvFeatures.substr(1); + llvm::raw_string_ostream os(fmvFeatures); + llvm::interleaveComma(orderedFeats, os); + attrs["cir.fmv-features"] = fmvFeatures; addedAttr = true; } } diff --git a/clang/test/CIR/Lowering/fmv-features.cir b/clang/test/CIR/Lowering/fmv-features.cir new file mode 100644 index 0000000000000..c3ff77292d096 --- /dev/null +++ b/clang/test/CIR/Lowering/fmv-features.cir @@ -0,0 +1,35 @@ +// Verify that the AArch64 Function Multi Versioning attribute "cir.fmv-features" +// is propagated end-to-end: + +// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR +// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s -check-prefix=LLVM + +module attributes {cir.triple = "aarch64-unknown-linux-gnu"} { + // Default version: empty fmv-features string (matches OGCG's value-less attr). + // MLIR: llvm.func @fmv_default() + // MLIR-SAME: "cir.fmv-features" = "" + // LLVM: define void @fmv_default(){{.*}} #[[ATTR_DEFAULT:[0-9]+]] + cir.func @fmv_default() attributes {"cir.fmv-features" = ""} { + cir.return + } + + // Single-feature version. + // MLIR: llvm.func @fmv_sve() + // MLIR-SAME: "cir.fmv-features" = "sve" + // LLVM: define void @fmv_sve(){{.*}} #[[ATTR_SVE:[0-9]+]] + cir.func @fmv_sve() attributes {"cir.fmv-features" = "sve"} { + cir.return + } + + // Multi-feature version: features are sorted and comma-separated. + // MLIR: llvm.func @fmv_aes_sve() + // MLIR-SAME: "cir.fmv-features" = "aes,sve" + // LLVM: define void @fmv_aes_sve(){{.*}} #[[ATTR_AES_SVE:[0-9]+]] + cir.func @fmv_aes_sve() attributes {"cir.fmv-features" = "aes,sve"} { + cir.return + } +} + +// LLVM-DAG: attributes #[[ATTR_DEFAULT]] = {{.*}}"fmv-features" +// LLVM-DAG: attributes #[[ATTR_SVE]] = {{.*}}"fmv-features"="sve" +// LLVM-DAG: attributes #[[ATTR_AES_SVE]] = {{.*}}"fmv-features"="aes,sve" >From b84d09ffd630294bd56103dfe1fc330e7d8b51ca Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Thu, 7 May 2026 11:16:22 +0530 Subject: [PATCH 3/3] ROllback to TODO for AArch64 Function Multi Versioning --- clang/include/clang/CIR/MissingFeatures.h | 1 + clang/lib/CIR/CodeGen/CIRGenModule.cpp | 28 ++---------------- clang/test/CIR/Lowering/fmv-features.cir | 35 ----------------------- 3 files changed, 3 insertions(+), 61 deletions(-) delete mode 100644 clang/test/CIR/Lowering/fmv-features.cir diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 6b231ec72a806..ba5c2bf786a99 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -78,6 +78,7 @@ struct MissingFeatures { static bool opFuncMaybeHandleStaticInExternC() { return false; } static bool opFuncMinSizeAttr() { return false; } static bool opFuncMultipleReturnVals() { return false; } + static bool opFuncMultiVersioning() { return false; } static bool opFuncNakedAttr() { return false; } static bool opFuncNoDuplicateAttr() { return false; } static bool opFuncOpenCLKernelMetadata() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 8485770006949..e4321f4f72ba4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -856,32 +856,8 @@ bool CIRGenModule::getCPUAndFeaturesAttributes( attrs["cir.target-features"] = llvm::join(features, ","); addedAttr = true; } - // Add metadata for AArch64 Function Multi Versioning. An empty string value - // for "cir.fmv-features" represents the default version (matches OGCG's - // value-less LLVM attribute). - if (getTarget().getTriple().isAArch64()) { - llvm::SmallVector<llvm::StringRef, 8> feats; - bool isDefault = false; - if (tv) { - isDefault = tv->isDefaultVersion(); - tv->getFeatures(feats); - } else if (tc) { - isDefault = tc->isDefaultVersion(gd.getMultiVersionIndex()); - tc->getFeatures(feats, gd.getMultiVersionIndex()); - } - if (isDefault) { - attrs["cir.fmv-features"] = ""; - addedAttr = true; - } else if (!feats.empty()) { - // Sort features and remove duplicates. - std::set<llvm::StringRef> orderedFeats(feats.begin(), feats.end()); - std::string fmvFeatures; - llvm::raw_string_ostream os(fmvFeatures); - llvm::interleaveComma(orderedFeats, os); - attrs["cir.fmv-features"] = fmvFeatures; - addedAttr = true; - } - } + // TODO(cir): add metadata for AArch64 Function Multi Versioning. + assert(!cir::MissingFeatures::opFuncMultiVersioning()); return addedAttr; } diff --git a/clang/test/CIR/Lowering/fmv-features.cir b/clang/test/CIR/Lowering/fmv-features.cir deleted file mode 100644 index c3ff77292d096..0000000000000 --- a/clang/test/CIR/Lowering/fmv-features.cir +++ /dev/null @@ -1,35 +0,0 @@ -// Verify that the AArch64 Function Multi Versioning attribute "cir.fmv-features" -// is propagated end-to-end: - -// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR -// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s -check-prefix=LLVM - -module attributes {cir.triple = "aarch64-unknown-linux-gnu"} { - // Default version: empty fmv-features string (matches OGCG's value-less attr). - // MLIR: llvm.func @fmv_default() - // MLIR-SAME: "cir.fmv-features" = "" - // LLVM: define void @fmv_default(){{.*}} #[[ATTR_DEFAULT:[0-9]+]] - cir.func @fmv_default() attributes {"cir.fmv-features" = ""} { - cir.return - } - - // Single-feature version. - // MLIR: llvm.func @fmv_sve() - // MLIR-SAME: "cir.fmv-features" = "sve" - // LLVM: define void @fmv_sve(){{.*}} #[[ATTR_SVE:[0-9]+]] - cir.func @fmv_sve() attributes {"cir.fmv-features" = "sve"} { - cir.return - } - - // Multi-feature version: features are sorted and comma-separated. - // MLIR: llvm.func @fmv_aes_sve() - // MLIR-SAME: "cir.fmv-features" = "aes,sve" - // LLVM: define void @fmv_aes_sve(){{.*}} #[[ATTR_AES_SVE:[0-9]+]] - cir.func @fmv_aes_sve() attributes {"cir.fmv-features" = "aes,sve"} { - cir.return - } -} - -// LLVM-DAG: attributes #[[ATTR_DEFAULT]] = {{.*}}"fmv-features" -// LLVM-DAG: attributes #[[ATTR_SVE]] = {{.*}}"fmv-features"="sve" -// LLVM-DAG: attributes #[[ATTR_AES_SVE]] = {{.*}}"fmv-features"="aes,sve" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
