Author: Stanislav Mekhanoshin Date: 2025-08-27T01:21:14-07:00 New Revision: 9cca295dccfa8bcefdd7f3ba512d5d042ae57ca8
URL: https://github.com/llvm/llvm-project/commit/9cca295dccfa8bcefdd7f3ba512d5d042ae57ca8 DIFF: https://github.com/llvm/llvm-project/commit/9cca295dccfa8bcefdd7f3ba512d5d042ae57ca8.diff LOG: [AMDGPU] More radical feature initialization refactoring (#155222) Factoring in flang, just have a single fillAMDGPUFeatureMap function doing it all as an external interface and returing an error. Added: Modified: clang/lib/Basic/Targets/AMDGPU.cpp flang/lib/Frontend/CompilerInstance.cpp llvm/include/llvm/TargetParser/TargetParser.h llvm/lib/TargetParser/TargetParser.cpp Removed: ################################################################################ diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index a235cccac516b..87de9e6865e71 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -201,8 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap( if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) return false; - // TODO: Should move this logic into TargetParser - auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features); + auto HasError = fillAMDGPUFeatureMap(CPU, getTriple(), Features); switch (HasError.first) { default: break; diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp index cd8dddad05282..d97b4b8af6d61 100644 --- a/flang/lib/Frontend/CompilerInstance.cpp +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -253,18 +253,15 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags, const TargetOptions &targetOpts, const llvm::Triple triple) { llvm::StringRef cpu = targetOpts.cpu; - llvm::StringMap<bool> implicitFeaturesMap; - // Get the set of implicit target features - llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap); + llvm::StringMap<bool> FeaturesMap; // Add target features specified by the user for (auto &userFeature : targetOpts.featuresAsWritten) { std::string userKeyString = userFeature.substr(1); - implicitFeaturesMap[userKeyString] = (userFeature[0] == '+'); + FeaturesMap[userKeyString] = (userFeature[0] == '+'); } - auto HasError = - llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap); + auto HasError = llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap); if (HasError.first) { unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, "Unsupported feature ID: %0"); @@ -273,9 +270,9 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags, } llvm::SmallVector<std::string> featuresVec; - for (auto &implicitFeatureItem : implicitFeaturesMap) { - featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") + - implicitFeatureItem.first().str()) + for (auto &FeatureItem : FeaturesMap) { + featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") + + FeatureItem.first().str()) .str()); } llvm::sort(featuresVec); diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 2f68d66dee90f..f8a4ee2f6fcfd 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -183,15 +183,11 @@ LLVM_ABI void fillValidArchListR600(SmallVectorImpl<StringRef> &Values); LLVM_ABI IsaVersion getIsaVersion(StringRef GPU); -/// Fills Features map with default values for given target GPU -LLVM_ABI void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, - StringMap<bool> &Features); - -/// Inserts wave size feature for given GPU into features map +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. LLVM_ABI std::pair<FeatureError, StringRef> -insertWaveSizeFeature(StringRef GPU, const Triple &T, - StringMap<bool> &Features); - +fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap<bool> &Features); } // namespace AMDGPU struct BasicSubtargetFeatureKV { diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 480622d6338fc..2194ef4df14d6 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -364,8 +364,326 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); } -void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { +static std::pair<FeatureError, StringRef> +insertWaveSizeFeature(StringRef GPU, const Triple &T, + const StringMap<bool> &DefaultFeatures, + StringMap<bool> &Features) { + const bool IsNullGPU = GPU.empty(); + const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32"); + const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64"); + const bool HaveWave32 = Features.count("wavefrontsize32"); + const bool HaveWave64 = Features.count("wavefrontsize64"); + if (HaveWave32 && HaveWave64) + return {AMDGPU::INVALID_FEATURE_COMBINATION, + "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; + + if (HaveWave32 && !IsNullGPU && TargetHasWave64) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; + + if (HaveWave64 && !IsNullGPU && TargetHasWave32) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"}; + + // Don't assume any wavesize with an unknown subtarget. + // Default to wave32 if target supports both. + if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 && + !TargetHasWave64) + Features.insert(std::make_pair("wavefrontsize32", true)); + + for (const auto &Entry : DefaultFeatures) { + if (!Features.count(Entry.getKey())) + Features[Entry.getKey()] = Entry.getValue(); + } + + return {NO_ERROR, StringRef()}; +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { + AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); + switch (Kind) { + case GK_GFX1250: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["gfx1250-insts"] = true; + Features["bitop3-insts"] = true; + Features["prng-inst"] = true; + Features["tanh-insts"] = true; + Features["tensor-cvt-lut-insts"] = true; + Features["transpose-load-f4f6-insts"] = true; + Features["bf16-trans-insts"] = true; + Features["bf16-cvt-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["fp8e5m3-insts"] = true; + Features["permlane16-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["vmem-pref-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["setprio-inc-wg-inst"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize32"] = true; + break; + case GK_GFX1201: + case GK_GFX1200: + case GK_GFX12_GENERIC: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot11-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1153: + case GK_GFX1152: + case GK_GFX1151: + case GK_GFX1150: + case GK_GFX1103: + case GK_GFX1102: + case GK_GFX1101: + case GK_GFX1100: + case GK_GFX11_GENERIC: + Features["ci-insts"] = true; + Features["dot5-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1036: + case GK_GFX1035: + case GK_GFX1034: + case GK_GFX1033: + case GK_GFX1032: + case GK_GFX1031: + case GK_GFX1030: + case GK_GFX10_3_GENERIC: + Features["ci-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX1012: + case GK_GFX1011: + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX1013: + case GK_GFX1010: + case GK_GFX10_1_GENERIC: + Features["dl-insts"] = true; + Features["ci-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX950: + Features["bitop3-insts"] = true; + Features["fp6bf6-cvt-scale-insts"] = true; + Features["fp4-cvt-scale-insts"] = true; + Features["bf8-cvt-scale-insts"] = true; + Features["fp8-cvt-scale-insts"] = true; + Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; + Features["f32-to-f16bf16-cvt-sr-insts"] = true; + Features["prng-inst"] = true; + Features["permlane16-swap"] = true; + Features["permlane32-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["dot12-insts"] = true; + Features["dot13-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["gfx950-insts"] = true; + [[fallthrough]]; + case GK_GFX942: + Features["fp8-insts"] = true; + Features["fp8-conversion-insts"] = true; + if (Kind != GK_GFX950) + Features["xf32-insts"] = true; + [[fallthrough]]; + case GK_GFX9_4_GENERIC: + Features["gfx940-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX90A: + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + [[fallthrough]]; + case GK_GFX908: + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + [[fallthrough]]; + case GK_GFX906: + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX90C: + case GK_GFX909: + case GK_GFX904: + case GK_GFX902: + case GK_GFX900: + case GK_GFX9_GENERIC: + Features["gfx9-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; + [[fallthrough]]; + case GK_GFX810: + case GK_GFX805: + case GK_GFX803: + case GK_GFX802: + case GK_GFX801: + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX705: + case GK_GFX704: + case GK_GFX703: + case GK_GFX702: + case GK_GFX701: + case GK_GFX700: + Features["ci-insts"] = true; + [[fallthrough]]; + case GK_GFX602: + case GK_GFX601: + case GK_GFX600: + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_NONE: + break; + default: + llvm_unreachable("Unhandled GPU!"); + } +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +std::pair<FeatureError, StringRef> +AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { // XXX - What does the member GPU mean if device name string passed here? if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) { // AMDGCN SPIRV must support the union of all AMDGCN features. This list @@ -434,280 +752,9 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; } else if (T.isAMDGCN()) { - AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); - switch (Kind) { - case GK_GFX1250: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["gfx1250-insts"] = true; - Features["bitop3-insts"] = true; - Features["prng-inst"] = true; - Features["tanh-insts"] = true; - Features["tensor-cvt-lut-insts"] = true; - Features["transpose-load-f4f6-insts"] = true; - Features["bf16-trans-insts"] = true; - Features["bf16-cvt-insts"] = true; - Features["fp8-conversion-insts"] = true; - Features["fp8e5m3-insts"] = true; - Features["permlane16-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["vmem-pref-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["setprio-inc-wg-inst"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - Features["wavefrontsize32"] = true; - break; - case GK_GFX1201: - case GK_GFX1200: - case GK_GFX12_GENERIC: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot11-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["fp8-conversion-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - break; - case GK_GFX1153: - case GK_GFX1152: - case GK_GFX1151: - case GK_GFX1150: - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - case GK_GFX11_GENERIC: - Features["ci-insts"] = true; - Features["dot5-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["gws"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - break; - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - case GK_GFX10_3_GENERIC: - Features["ci-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX1012: - case GK_GFX1011: - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX1013: - case GK_GFX1010: - case GK_GFX10_1_GENERIC: - Features["dl-insts"] = true; - Features["ci-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - break; - case GK_GFX950: - Features["bitop3-insts"] = true; - Features["fp6bf6-cvt-scale-insts"] = true; - Features["fp4-cvt-scale-insts"] = true; - Features["bf8-cvt-scale-insts"] = true; - Features["fp8-cvt-scale-insts"] = true; - Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; - Features["f32-to-f16bf16-cvt-sr-insts"] = true; - Features["prng-inst"] = true; - Features["permlane16-swap"] = true; - Features["permlane32-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["dot12-insts"] = true; - Features["dot13-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["gfx950-insts"] = true; - [[fallthrough]]; - case GK_GFX942: - Features["fp8-insts"] = true; - Features["fp8-conversion-insts"] = true; - if (Kind != GK_GFX950) - Features["xf32-insts"] = true; - [[fallthrough]]; - case GK_GFX9_4_GENERIC: - Features["gfx940-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - Features["ci-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - Features["wavefrontsize64"] = true; - break; - case GK_GFX90A: - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - [[fallthrough]]; - case GK_GFX908: - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - [[fallthrough]]; - case GK_GFX906: - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX90C: - case GK_GFX909: - case GK_GFX904: - case GK_GFX902: - case GK_GFX900: - case GK_GFX9_GENERIC: - Features["gfx9-insts"] = true; - Features["vmem-to-lds-load-insts"] = true; - [[fallthrough]]; - case GK_GFX810: - case GK_GFX805: - case GK_GFX803: - case GK_GFX802: - case GK_GFX801: - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - Features["ci-insts"] = true; - Features["image-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["wavefrontsize64"] = true; - break; - case GK_GFX705: - case GK_GFX704: - case GK_GFX703: - case GK_GFX702: - case GK_GFX701: - case GK_GFX700: - Features["ci-insts"] = true; - [[fallthrough]]; - case GK_GFX602: - case GK_GFX601: - case GK_GFX600: - Features["image-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["atomic-fmin-fmax-global-f32"] = true; - Features["atomic-fmin-fmax-global-f64"] = true; - Features["wavefrontsize64"] = true; - break; - case GK_NONE: - break; - default: - llvm_unreachable("Unhandled GPU!"); - } + StringMap<bool> DefaultFeatures; + fillAMDGCNFeatureMap(GPU, T, DefaultFeatures); + return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features); } else { if (GPU.empty()) GPU = "r600"; @@ -736,39 +783,5 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, llvm_unreachable("Unhandled GPU!"); } } -} - -std::pair<FeatureError, StringRef> -AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { - StringMap<bool> DefaultFeatures; - fillAMDGPUFeatureMap(GPU, T, DefaultFeatures); - - const bool IsNullGPU = GPU.empty(); - const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32"); - const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64"); - const bool HaveWave32 = Features.count("wavefrontsize32"); - const bool HaveWave64 = Features.count("wavefrontsize64"); - if (HaveWave32 && HaveWave64) { - return {AMDGPU::INVALID_FEATURE_COMBINATION, - "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; - } - if (HaveWave32 && !IsNullGPU && TargetHasWave64) { - return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; - } - if (HaveWave64 && !IsNullGPU && TargetHasWave32) { - return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"}; - } - // Don't assume any wavesize with an unknown subtarget. - // Default to wave32 if target supports both. - if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 && - !TargetHasWave64) - Features.insert(std::make_pair("wavefrontsize32", true)); - - for (const auto &Entry : DefaultFeatures) { - if (!Features.count(Entry.getKey())) - Features[Entry.getKey()] = Entry.getValue(); - } - return {NO_ERROR, StringRef()}; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits