https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/176533
>From 3356df31e8c556b258aad87253cf8e95a33e7731 Mon Sep 17 00:00:00 2001 From: Shilei Tian <[email protected]> Date: Fri, 16 Jan 2026 22:39:13 -0500 Subject: [PATCH] [RFC][Clang][AMDGPU] Emit only delta target-features to reduce IR bloat Currently, AMDGPU functions have `target-features` attribute populated with all default features for the target GPU. This is redundant because the backend can derive these defaults from the `target-cpu` attribute via `AMDGPUTargetMachine::getFeatureString()`. In this PR, for AMDGPU targets only: - Functions without explicit target attributes no longer emit `target-features` - Functions with `__attribute__((target(...)))` or `-target-feature` emit only features that differ from the target's defaults (delta) The backend already handles missing `target-features` correctly by falling back to the TargetMachine's defaults. A new cc1 flag `-famdgpu-emit-full-target-features` is added to emit full features when needed. Example: Before: ```llvm attributes #0 = { "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,..." } ``` After (default): ```llvm attributes #0 = { "target-cpu"="gfx90a" } ``` After (with explicit `+wavefrontsize32` override): ```llvm attributes #0 = { "target-cpu"="gfx90a" "target-features"="+wavefrontsize32" } ``` --- clang/include/clang/Basic/CodeGenOptions.def | 4 + clang/include/clang/Options/Options.td | 6 + clang/lib/CodeGen/CodeGenModule.cpp | 48 +++++++- clang/test/CodeGen/link-builtin-bitcode.c | 2 +- .../test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 +- .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 8 +- .../amdgpu-features-default-delta.cl | 70 ++++++++++++ clang/test/CodeGenOpenCL/amdgpu-features.cl | 106 +++++++++--------- ...eadonly-features-written-with-no-target.cl | 10 +- clang/test/OpenMP/amdgcn-attributes.cpp | 4 +- 10 files changed, 189 insertions(+), 73 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index baf8b093c10e6..ec3cf0b432143 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -470,6 +470,10 @@ CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1, Benign) /// Expands s_waitcnt instructions to help PC-sampling profilers identify stalls. CODEGENOPT(AMDGPUExpandWaitcntProfiling, 1, 0, Benign) +/// Emit full target-features attribute for AMDGPU (for testing). (AMDGPU Only) +/// By default, only features that differ from the target CPU's defaults are emitted. +CODEGENOPT(AMDGPUEmitFullTargetFeatures, 1, 0, Benign) + // Whether to emit Swift Async function extended frame information: auto, // never, always. ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2, diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 188739e72434a..daecec88adcf2 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5606,6 +5606,12 @@ defm amdgpu_expand_waitcnt_profiling : BoolMOption<"amdgpu-expand-waitcnt-profil "emits waitcnt(N-1), waitcnt(N-2), ..., waitcnt(target). (AMDGPU only)">, NegFlag<SetFalse, [], [ClangOption]>>; +def famdgpu_emit_full_target_features : Flag<["-"], "famdgpu-emit-full-target-features">, + Visibility<[CC1Option]>, + HelpText<"Emit full target-features attribute for AMDGPU functions instead of " + "only the delta from the target CPU's defaults. (AMDGPU only)">, + MarshallingInfoFlag<CodeGenOpts<"AMDGPUEmitFullTargetFeatures">>; + def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>, HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index dc8a31b7f7f0d..3532306554938 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2950,14 +2950,26 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr; bool AddedAttr = false; + auto HandleAMDGPUFeatureDelta = [&](llvm::StringMap<bool> &FeatureMap) { + // Get the default feature map for the (possibly overridden) target CPU. + llvm::StringMap<bool> DefaultFeatureMap; + getTarget().initFeatureMap(DefaultFeatureMap, getContext().getDiagnostics(), + TargetCPU, {}); + + // Only emit features that differ from the defaults. + for (const auto &Entry : FeatureMap) { + auto DefaultIt = DefaultFeatureMap.find(Entry.getKey()); + // Emit if the feature is not in defaults or has a different value. + if (DefaultIt == DefaultFeatureMap.end() || + DefaultIt->getValue() != Entry.getValue()) + Features.push_back((Entry.getValue() ? "+" : "-") + + Entry.getKey().str()); + } + }; if (TD || TV || SD || TC) { llvm::StringMap<bool> FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); - // Produce the canonical string for this set of features. - for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) - Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); - // Now add the target-cpu and target-features to the function. // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for @@ -2980,10 +2992,36 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // favor this processor. TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName(); } + + // For AMDGPU, by default only emit delta features (features that differ + // from the target CPU's defaults). Use -famdgpu-emit-full-target-features + // to emit all features. + if (getTarget().getTriple().isAMDGPU() && + !CodeGenOpts.AMDGPUEmitFullTargetFeatures) { + HandleAMDGPUFeatureDelta(FeatureMap); + } else { + // Produce the canonical string for this set of features. + for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) + Features.push_back((Entry.getValue() ? "+" : "-") + + Entry.getKey().str()); + } } else { // Otherwise just add the existing target cpu and target features to the // function. - Features = getTarget().getTargetOpts().Features; + if (SetTargetFeatures && getTarget().getTriple().isAMDGPU() && + !CodeGenOpts.AMDGPUEmitFullTargetFeatures) { + llvm::StringMap<bool> FeatureMap; + if (FD) { + getContext().getFunctionFeatureMap(FeatureMap, GD); + } else { + getTarget().initFeatureMap(FeatureMap, getContext().getDiagnostics(), + TargetCPU, + getTarget().getTargetOpts().Features); + } + HandleAMDGPUFeatureDelta(FeatureMap); + } else { + Features = getTarget().getTargetOpts().Features; + } } if (!TargetCPU.empty()) { diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index f6e45bf573705..d03fd6fc66d03 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-LABEL: @attr_incompatible // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { -// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" } // CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } // CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } // CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl index 38b5ed8de34cc..ece84d5b75ca7 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; } // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" } -// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" } +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" } // CHECK: attributes #[[ATTR2]] = { convergent nounwind } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 2cbc9787a04b0..c2d7616a33754 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -816,12 +816,12 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: attributes #[[ATTR10]] = { convergent nounwind } //. // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } -// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } -// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } -// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } +// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" "uniform-work-group-size"="false" } +// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } // GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } // GFX900: attributes #[[ATTR8]] = { convergent nounwind } // GFX900: attributes #[[ATTR9]] = { nounwind } diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl new file mode 100644 index 0000000000000..d41a029c084b1 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl @@ -0,0 +1,70 @@ +// REQUIRES: amdgpu-registered-target + +// Test that by default, AMDGPU functions only emit delta target-features +// (features that differ from the target CPU's defaults). This reduces IR bloat. + +// Default behavior for gfx90a: test_default has no target-features, +// test_explicit_attr has only the delta (+gfx11-insts). +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s + +// With -famdgpu-emit-full-target-features, all features are emitted for both functions. +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=FULL %s + +// With -target-feature, both functions get the delta feature. +// gfx1030 defaults to wavefrontsize32, so +wavefrontsize64 is a delta. +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=CMDLINE %s + +// GFX90A-LABEL: define {{.*}} @test_default() +// GFX90A-SAME: #[[ATTR_DEFAULT:[0-9]+]] +// GFX90A-LABEL: define {{.*}} @test_explicit_attr() +// GFX90A-SAME: #[[ATTR_EXPLICIT:[0-9]+]] +// +// test_default should have target-cpu but NO target-features +// GFX90A: attributes #[[ATTR_DEFAULT]] = { +// GFX90A-SAME: "target-cpu"="gfx90a" +// GFX90A-NOT: "target-features" +// GFX90A-SAME: } +// +// test_explicit_attr should have target-cpu and ONLY the delta target-features +// GFX90A: attributes #[[ATTR_EXPLICIT]] = { +// GFX90A-SAME: "target-cpu"="gfx90a" +// GFX90A-SAME: "target-features"="+gfx11-insts" +// GFX90A-SAME: } + +// With -famdgpu-emit-full-target-features, both functions get full features. +// FULL-LABEL: define {{.*}} @test_default() +// FULL-SAME: #[[ATTR_DEFAULT:[0-9]+]] +// FULL-LABEL: define {{.*}} @test_explicit_attr() +// FULL-SAME: #[[ATTR_EXPLICIT:[0-9]+]] +// +// FULL: attributes #[[ATTR_DEFAULT]] = { +// FULL-SAME: "target-cpu"="gfx90a" +// FULL-SAME: "target-features"="{{[^"]+}}" +// FULL-SAME: } +// +// FULL: attributes #[[ATTR_EXPLICIT]] = { +// FULL-SAME: "target-cpu"="gfx90a" +// FULL-SAME: "target-features"="{{[^"]+}}" +// FULL-SAME: } + +// With -target-feature +wavefrontsize64, test_default gets just that delta, +// test_explicit_attr gets both +gfx11-insts and +wavefrontsize64. +// CMDLINE-LABEL: define {{.*}} @test_default() +// CMDLINE-SAME: #[[ATTR_DEFAULT:[0-9]+]] +// CMDLINE-LABEL: define {{.*}} @test_explicit_attr() +// CMDLINE-SAME: #[[ATTR_EXPLICIT:[0-9]+]] +// +// CMDLINE: attributes #[[ATTR_DEFAULT]] = { +// CMDLINE-SAME: "target-cpu"="gfx1030" +// CMDLINE-SAME: "target-features"="+wavefrontsize64" +// CMDLINE-SAME: } +// +// CMDLINE: attributes #[[ATTR_EXPLICIT]] = { +// CMDLINE-SAME: "target-cpu"="gfx1030" +// CMDLINE-SAME: "target-features"="{{[^"]*}}+gfx11-insts{{[^"]*}}+wavefrontsize64{{[^"]*}}" +// CMDLINE-SAME: } + +kernel void test_default() {} + +__attribute__((target("gfx11-insts"))) +void test_explicit_attr() {} diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index df5b56890dd5c..b3f844739e5c5 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -1,63 +1,63 @@ // REQUIRES: amdgpu-registered-target // Check that appropriate features are defined for every supported AMDGPU -// "-target" and "-mcpu" options. +// "-target" and "-mcpu" options when -famdgpu-emit-full-target-features is used. -// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s -// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s -// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx602 -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx701 -emit-llvm -o - %s | FileCheck --check-prefix=GFX701 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx702 -emit-llvm -o - %s | FileCheck --check-prefix=GFX702 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx703 -emit-llvm -o - %s | FileCheck --check-prefix=GFX703 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx704 -emit-llvm -o - %s | FileCheck --check-prefix=GFX704 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx705 -emit-llvm -o - %s | FileCheck --check-prefix=GFX705 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx802 -emit-llvm -o - %s | FileCheck --check-prefix=GFX802 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx803 -emit-llvm -o - %s | FileCheck --check-prefix=GFX803 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx805 -emit-llvm -o - %s | FileCheck --check-prefix=GFX805 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx810 -emit-llvm -o - %s | FileCheck --check-prefix=GFX810 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefix=GFX900 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx902 -emit-llvm -o - %s | FileCheck --check-prefix=GFX902 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx908 -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1013 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1031 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1032 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1033 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1034 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1035 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1036 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1036 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1101 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1153 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1250 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1251 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1251 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx600 -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx601 -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx602 -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx700 -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx701 -emit-llvm -o - %s | FileCheck --check-prefix=GFX701 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx702 -emit-llvm -o - %s | FileCheck --check-prefix=GFX702 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx703 -emit-llvm -o - %s | FileCheck --check-prefix=GFX703 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx704 -emit-llvm -o - %s | FileCheck --check-prefix=GFX704 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx705 -emit-llvm -o - %s | FileCheck --check-prefix=GFX705 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx801 -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx802 -emit-llvm -o - %s | FileCheck --check-prefix=GFX802 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx803 -emit-llvm -o - %s | FileCheck --check-prefix=GFX803 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx805 -emit-llvm -o - %s | FileCheck --check-prefix=GFX805 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx810 -emit-llvm -o - %s | FileCheck --check-prefix=GFX810 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefix=GFX900 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx902 -emit-llvm -o - %s | FileCheck --check-prefix=GFX902 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx904 -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx906 -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx908 -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1013 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1031 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1032 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1033 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1034 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1035 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1036 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1036 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1101 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1153 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1250 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1251 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1251 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s +// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s // NOCPU-NOT: "target-features" // NOCPU-WAVE32: "target-features"="+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl index 2d50ce7cab2e0..1239df2a96d2e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl @@ -4,13 +4,11 @@ // if there is no target specified. // RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s -// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s __attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {} // NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts" -// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32" +// CHECK-NOT: "target-features"={{.*}} diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp index 03f5c31e3157c..ad7d19cbbd55c 100644 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ b/clang/test/OpenMP/amdgcn-attributes.cpp @@ -32,9 +32,9 @@ int callable(int x) { } // DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" } +// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size"="true" } // NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" } +// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" } // NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
