https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/126253
Backport 6424abcd6c9c6aa8171c79d0fe0369d3a10da3d5 Requested by: @davemgreen >From 521366d7d3d97b8c61e6c2a868c20149ef80263a Mon Sep 17 00:00:00 2001 From: David Green <david.gr...@arm.com> Date: Fri, 7 Feb 2025 10:16:57 +0000 Subject: [PATCH] [AArch64] Enable AvoidLDAPUR for cpu=generic between armv8.4 and armv9.3. (#125261) As added in #124274, CPUs in this range can suffer from performance issues with ldapur. As the gain from ldar->ldapr is expected to be greater than the minor gain from ldapr->ldapur, this opts to avoid the instruction under the default -mcpu=generic when the -march is less that armv8.8 / armv9.3. I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be clearer what it means. (cherry picked from commit 6424abcd6c9c6aa8171c79d0fe0369d3a10da3d5) --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 7 ++++++- llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 ++-- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +- .../AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll | 7 +++++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index bc921f07e1dbf89..809e658e6538017 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -125,7 +125,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { // this in the future so we can specify it together with the subtarget // features. switch (ARMProcFamily) { - case Others: + case Generic: + // Using TuneCPU=generic we avoid ldapur instructions to line up with the + // cpus that use the AvoidLDAPUR feature. We don't want this to be on + // forever, so it is enabled between armv8.4 and armv8.7/armv9.2. + if (hasV8_4aOps() && !hasV8_8aOps()) + AvoidLDAPUR = true; break; case Carmel: CacheLineSize = 64; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index d22991224d496d9..dca5f5393fe47b3 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -38,7 +38,7 @@ class Triple; class AArch64Subtarget final : public AArch64GenSubtargetInfo { public: enum ARMProcFamilyEnum : uint8_t { - Others, + Generic, #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, #include "llvm/TargetParser/AArch64TargetParserDef.inc" #undef ARM_PROCESSOR_FAMILY @@ -46,7 +46,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. - ARMProcFamilyEnum ARMProcFamily = Others; + ARMProcFamilyEnum ARMProcFamily = Generic; // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 4af3c482e65984b..cd994d53a60088f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4272,7 +4272,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by // checking for that case, we can ensure that the default behaviour is // unchanged - if (ST->getProcFamily() != AArch64Subtarget::Others && + if (ST->getProcFamily() != AArch64Subtarget::Generic && !ST->getSchedModel().isOutOfOrder()) { UP.Runtime = true; UP.Partial = true; diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll index b475e68db411a42..02ff12c27fcda9d 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll @@ -1,12 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "(?!^\s*lda.*\bsp\b)^\s*.*\bsp\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL -; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo,avoid-ldapur -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v2 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR ; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) { ; CHECK-LABEL: load_atomic_i8_aligned_unordered: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits