https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781
>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Fri, 28 Feb 2025 14:41:56 -0800 Subject: [PATCH 1/4] [CodeGen][StaticDataSplitter]Support constant pool partitioning --- llvm/include/llvm/CodeGen/AsmPrinter.h | 8 + .../CodeGen/TargetLoweringObjectFileImpl.h | 6 + .../llvm/Target/TargetLoweringObjectFile.h | 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 22 ++- llvm/lib/CodeGen/StaticDataSplitter.cpp | 56 +++++-- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 35 +++++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 10 ++ llvm/lib/Target/TargetLoweringObjectFile.cpp | 10 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 10 ++ .../AArch64/constant-pool-partition.ll | 141 ++++++++++++++++++ .../CodeGen/X86/constant-pool-partition.ll | 131 ++++++++++++++++ 11 files changed, 422 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 3da63af5ba571..2018f411be796 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize = nullptr; + /// Provides the profile information for constants. + const StaticDataProfileInfo *SDPI = nullptr; + + /// The profile summary information. + const ProfileSummaryInfo *PSI = nullptr; + /// Map a basic block section ID to the begin and end symbols of that section /// which determine the section's range. struct MBBSectionRange { diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 10f0594c267ae..563980fb24ab8 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { const Constant *C, Align &Alignment) const override; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, Align &Alignment, + StringRef SectionSuffix) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index a5ed1b29dc1bc..1956748b8058b 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { SectionKind Kind, const Constant *C, Align &Alignment) const; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C, + Align &Alignment, + StringRef SectionSuffix) const; + virtual MCSection * getSectionForMachineBasicBlock(const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3c4280333e76d..60018afe2f8a7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = getObjFileLowering().getSectionForConstant( - getDataLayout(), Kind, C, Alignment); + MCSection *S = nullptr; + if (TM.Options.EnableStaticDataPartitioning) { + SmallString<8> SectionNameSuffix; + if (C && SDPI && PSI) { + auto Count = SDPI->getConstantProfileCount(C); + if (Count) { + if (PSI->isHotCount(*Count)) { + SectionNameSuffix.append("hot"); + } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) { + SectionNameSuffix.append("unlikely"); + } + } + } + + S = getObjFileLowering().getSectionForConstant( + getDataLayout(), Kind, C, Alignment, SectionNameSuffix); + } else { + S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C, + Alignment); + } // The number of sections are small, just do a linear search from the // last section to the first. diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index c647c3075d79c..4768c0829ea49 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -10,7 +10,7 @@ // for the following types of static data: // - Jump tables // - Module-internal global variables -// - Constant pools (TODO) +// - Constant pools // // For the original RFC of this pass please see // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 @@ -117,16 +117,17 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + const MachineConstantPool *MCP = MF.getConstantPool(); // Jump table could be used by either terminating instructions or // non-terminating ones, so we walk all instructions and use // `MachineOperand::isJTI()` to identify jump table operands. - // Similarly, `MachineOperand::isCPI()` can identify constant pool usages - // in the same loop. + // Similarly, `MachineOperand::isCPI()` is used to identify constant pool + // usages in the same loop. for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - if (!Op.isJTI() && !Op.isGlobal()) + if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI()) continue; std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); @@ -148,7 +149,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) ++NumChangedJumpTables; - } else { + } else if (Op.isGlobal()) { // Find global variables with local linkage. const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); @@ -159,6 +160,20 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { !inStaticDataSection(GV, TM)) continue; SDPI->addConstantProfileCount(GV, Count); + } else { + assert(Op.isCPI() && "Op must be constant pool index in this branch"); + int CPI = Op.getIndex(); + if (CPI == -1) + continue; + + assert(MCP != nullptr && "Constant pool info is not available."); + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + + if (CPE.isMachineConstantPoolEntry()) + continue; + + const Constant *C = CPE.Val.ConstVal; + SDPI->addConstantProfileCount(C, Count); } } } @@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { void StaticDataSplitter::annotateStaticDataWithoutProfiles( const MachineFunction &MF) { + const MachineConstantPool *MCP = MF.getConstantPool(); for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - if (!Op.isGlobal()) - continue; - const GlobalVariable *GV = - getLocalLinkageGlobalVariable(Op.getGlobal()); - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(GV, MF.getTarget())) + if (!Op.isGlobal() && !Op.isCPI()) continue; - SDPI->addConstantProfileCount(GV, std::nullopt); + if (Op.isGlobal()) { + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, MF.getTarget())) + continue; + SDPI->addConstantProfileCount(GV, std::nullopt); + } else { + assert(Op.isCPI() && "Op must be constant pool index in this branch"); + int CPI = Op.getIndex(); + if (CPI == -1) + continue; + + assert(MCP != nullptr && "Constant pool info is not available."); + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + + if (CPE.isMachineConstantPoolEntry()) + continue; + + const Constant *C = CPE.Val.ConstVal; + SDPI->addConstantProfileCount(C, std::nullopt); + } } } } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index be2f5fb0b4a79..6cf8a0e9d211f 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( return DataRelROSection; } +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, + StringRef SectionPrefix) const { + // TODO: Share code between this function and + // MCObjectInfo::initELFMCObjectFileInfo. + if (SectionPrefix.empty()) + return getSectionForConstant(DL, Kind, C, Alignment); + + auto &Context = getContext(); + if (Kind.isMergeableConst4() && MergeableConst4Section) + return Context.getELFSection(".rodata.cst4." + SectionPrefix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); + if (Kind.isMergeableConst8() && MergeableConst8Section) + return Context.getELFSection(".rodata.cst8." + SectionPrefix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); + if (Kind.isMergeableConst16() && MergeableConst16Section) + return Context.getELFSection(".rodata.cst16." + SectionPrefix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); + if (Kind.isMergeableConst32() && MergeableConst32Section) + return Context.getELFSection(".rodata.cst32." + SectionPrefix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); + if (Kind.isReadOnly()) + return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return Context.getELFSection(".data.rel.ro" + SectionPrefix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); +} + /// Returns a unique section for the given machine basic block. MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index fc38bfe93c1e0..74a78457e42ec 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -226,6 +226,16 @@ class AArch64AsmPrinter : public AsmPrinter { } bool runOnMachineFunction(MachineFunction &MF) override { + auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>(); + if (PSIW) { + PSI = &PSIW->getPSI(); + } + + auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>(); + if (SDPIW) { + SDPI = &SDPIW->getStaticDataProfileInfo(); + } + AArch64FI = MF.getInfo<AArch64FunctionInfo>(); STI = &MF.getSubtarget<AArch64Subtarget>(); diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index 02c101055d9f3..07f5532bee17e 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant( return DataSection; } +MCSection *TargetLoweringObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, + StringRef SectionPrefix) const { + // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it + // is empty. + if (SectionPrefix.empty()) + return getSectionForConstant(DL, Kind, C, Alignment); + report_fatal_error("Unimplemented"); +} + MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock( const Function &F, const MachineBasicBlock &MBB, const TargetMachine &TM) const { diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 79aa898e18bfa..f58974e79efb9 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -20,6 +20,7 @@ #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -61,6 +62,15 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM, /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>(); + if (PSIW) { + PSI = &PSIW->getPSI(); + } + + auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>(); + if (SDPIW) { + SDPI = &SDPIW->getStaticDataProfileInfo(); + } Subtarget = &MF.getSubtarget<X86Subtarget>(); SMShadowTracker.startFunction(MF); diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll new file mode 100644 index 0000000000000..5d2df59d34317 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll @@ -0,0 +1,141 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Repeat the RUN command above for big-endian systems. +; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from cold_func first, unprofiled_func +; secondly, and then hot_func. Specifically, tests that +; - If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; `.hot` suffix. +; - Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have `.unlikely` suffix. + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI0_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK: .LCPI0_1: +; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI0_2: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI1_0: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI1_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI2_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI2_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK: .globl val + +define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t2 = bitcast <8 x i8> %t1 to <2 x i32> + %3 = extractelement <2 x i32> %t2, i32 1 + %sum = add i32 %2, %3 + %ret = add i32 %sum, %num + ret i32 %ret +} + +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare i32 @func_taking_arbitrary_param(...) + +define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t2 = bitcast <8 x i8> %t1 to <4 x i16> + %t3 = zext <4 x i16> %t2 to <4 x i32> + %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3 + ret <4 x i1> %cmp +} + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0> + ret <4 x i1> %b +} + +@val = unnamed_addr constant i256 1 + +define i32 @main(i32 %0, ptr %1) !prof !16 { + br label %7 + +5: ; preds = %7 + %x = call double @double_func() + %a = call <16 x i8> @vector_func_16i8() + %b = call <16 x i8> @vector_func_16i8() + call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) + ret i32 0 + +7: ; preds = %7, %2 + %8 = phi i32 [ 0, %2 ], [ %10, %7 ] + %9 = call i32 @rand() + call void @hot_func(i32 %9) + %10 = add i32 %8, 1 + %11 = icmp eq i32 %10, 100000 + br i1 %11, label %5, label %7, !prof !18 +} + +declare i32 @rand() +declare double @double_func() +declare <4 x i32> @vector_func() +declare <16 x i8> @vector_func_16i8() + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460617} +!5 = !{!"MaxCount", i64 849536} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849536} +!8 = !{!"NumCounts", i64 23784} +!9 = !{!"NumFunctions", i64 3301} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15} +!14 = !{i32 990000, i64 166, i32 73} +!15 = !{i32 999999, i64 3, i32 1463} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"function_entry_count", i64 100000} +!18 = !{!"branch_weights", i32 1, i32 99999} diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll new file mode 100644 index 0000000000000..e39a5d2026dd7 --- /dev/null +++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll @@ -0,0 +1,131 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from @cold_func first, two functions +; without profiles secondly, and then @hot_func. Specifically, tests that +; 1. If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; .hot suffix. +; 2. Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have .unlikely suffix. + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \ +; RUN: -unique-section-names=true \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=false -data-sections=false \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI0_0: +; CHECK: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK: .LCPI0_1: +; CHECK: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI0_2: +; CHECK: .quad 0x3fc0000000000000 # double 0.125 + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI1_0: +; CHECK: .quad 0x3fc0000000000000 # double 0.125 + +; CHECK: .section .rodata.cst4,"aM",@progbits,4 +; CHECK: .LCPI2_0: +; CHECK: .long 0x3e000000 # float 0.125 + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI3_0: +; CHECK: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI3_1: +; CHECK: .long 2147483648 # 0x80000000 +; CHECK: .long 2147483648 # 0x80000000 +; CHECK: .long 2147483648 # 0x80000000 +; CHECK: .long 2147483648 # 0x80000000 +; CHECK: .LCPI3_2: +; CHECK: .long 2147484090 # 0x800001ba +; CHECK: .long 2147483748 # 0x80000064 +; CHECK: .long 2147483648 # 0x80000000 +; CHECK: .long 2147483648 # 0x80000000 + +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK: .globl val + +define double @cold_func(double %x) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %y = fmul double %x, 0x3EB0000000000000 + %z = fmul double %y, 0x3fc0000000000000 + ret double %z +} + +define double @unprofiled_func_double(double %x) { + %z = fmul double %x, 0x3fc0000000000000 + ret double %z +} + +define float @unprofiled_func_float(float %x) { + %z = fmul float %x, 0x3fc0000000000000 + ret float %z +} + + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0> + ret <4 x i1> %b +} + +@val = unnamed_addr constant i256 1 + +define i32 @main(i32 %0, ptr %1) !prof !16 { + br label %7 + +5: ; preds = %7 + %x = call double @double_func() + call void @cold_func(double %x) + ret i32 0 + +7: ; preds = %7, %2 + %8 = phi i32 [ 0, %2 ], [ %10, %7 ] + %9 = call i32 @rand() + call void @hot_func(i32 %9) + %10 = add i32 %8, 1 + %11 = icmp eq i32 %10, 100000 + br i1 %11, label %5, label %7, !prof !18 +} + +declare i32 @rand() +declare double @double_func() +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460617} +!5 = !{!"MaxCount", i64 849536} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849536} +!8 = !{!"NumCounts", i64 23784} +!9 = !{!"NumFunctions", i64 3301} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15} +!14 = !{i32 990000, i64 166, i32 73} +!15 = !{i32 999999, i64 1, i32 1463} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"function_entry_count", i64 100000} +!18 = !{!"branch_weights", i32 1, i32 99999} >From 9fae47c06f8d559bd90ddec9be6b0cd34131bbd6 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Wed, 5 Mar 2025 16:15:15 -0800 Subject: [PATCH 2/4] resolve comments --- llvm/include/llvm/CodeGen/AsmPrinter.h | 4 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 39 ++++---- llvm/lib/CodeGen/StaticDataSplitter.cpp | 92 +++++++++---------- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 16 ++-- 4 files changed, 73 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 2018f411be796..bd0f5ada805ab 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -338,6 +338,10 @@ class AsmPrinter : public MachineFunctionPass { DwarfUsesRelocationsAcrossSections = Enable; } + // Returns a section suffix (hot or unlikely) for the constant if profiles + // are available. Returns empty string otherwise. + StringRef getConstantSectionSuffix(const Constant *C) const; + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 60018afe2f8a7..bec3e718bd11b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2769,6 +2769,23 @@ namespace { } // end anonymous namespace +StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const { + SmallString<8> SectionNameSuffix; + if (TM.Options.EnableStaticDataPartitioning) { + if (C && SDPI && PSI) { + auto Count = SDPI->getConstantProfileCount(C); + if (Count) { + if (PSI->isHotCount(*Count)) { + SectionNameSuffix.append("hot"); + } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) { + SectionNameSuffix.append("unlikely"); + } + } + } + } + return SectionNameSuffix.str(); +} + /// EmitConstantPool - Print to the current output stream assembly /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by @@ -2791,26 +2808,8 @@ void AsmPrinter::emitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = nullptr; - if (TM.Options.EnableStaticDataPartitioning) { - SmallString<8> SectionNameSuffix; - if (C && SDPI && PSI) { - auto Count = SDPI->getConstantProfileCount(C); - if (Count) { - if (PSI->isHotCount(*Count)) { - SectionNameSuffix.append("hot"); - } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) { - SectionNameSuffix.append("unlikely"); - } - } - } - - S = getObjFileLowering().getSectionForConstant( - getDataLayout(), Kind, C, Alignment, SectionNameSuffix); - } else { - S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C, - Alignment); - } + MCSection *S = getObjFileLowering().getSectionForConstant( + getDataLayout(), Kind, C, Alignment, getConstantSectionSuffix(C)); // The number of sections are small, just do a linear search from the // last section to the first. diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 4768c0829ea49..df5ae7c2e8369 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -69,6 +69,11 @@ class StaticDataSplitter : public MachineFunctionPass { void annotateStaticDataWithoutProfiles(const MachineFunction &MF); + // Returns the constant if the operand refers to a global variable or constant + // that gets lowered to static data sections. Otherwise, return nullptr. + const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM, + const MachineConstantPool *MCP); + public: static char ID; @@ -112,12 +117,42 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { return Changed; } +const Constant * +StaticDataSplitter::getConstant(const MachineOperand &Op, + const TargetMachine &TM, + const MachineConstantPool *MCP) { + if (!Op.isGlobal() && !Op.isCPI()) + return nullptr; + + if (Op.isGlobal()) { + // Find global variables with local linkage. + const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); + // Skip 'special' global variables conservatively because they are + // often handled specially, and skip those not in static data + // sections. + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, TM)) + return nullptr; + return GV; + } + assert(Op.isCPI() && "Op must be constant pool index in this branch"); + int CPI = Op.getIndex(); + if (CPI == -1) + return nullptr; + + assert(MCP != nullptr && "Constant pool info is not available."); + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + + if (CPE.isMachineConstantPoolEntry()) + return nullptr; + + return CPE.Val.ConstVal; +} + bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { int NumChangedJumpTables = 0; - const TargetMachine &TM = MF.getTarget(); MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); - const MachineConstantPool *MCP = MF.getConstantPool(); // Jump table could be used by either terminating instructions or // non-terminating ones, so we walk all instructions and use @@ -149,30 +184,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) ++NumChangedJumpTables; - } else if (Op.isGlobal()) { - // Find global variables with local linkage. - const GlobalVariable *GV = - getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'special' global variables conservatively because they are - // often handled specially, and skip those not in static data - // sections. - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(GV, TM)) - continue; - SDPI->addConstantProfileCount(GV, Count); - } else { - assert(Op.isCPI() && "Op must be constant pool index in this branch"); - int CPI = Op.getIndex(); - if (CPI == -1) - continue; - - assert(MCP != nullptr && "Constant pool info is not available."); - const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; - - if (CPE.isMachineConstantPoolEntry()) - continue; - - const Constant *C = CPE.Val.ConstVal; + } else if (const Constant *C = + getConstant(Op, MF.getTarget(), MF.getConstantPool())) { SDPI->addConstantProfileCount(C, Count); } } @@ -218,34 +231,13 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { void StaticDataSplitter::annotateStaticDataWithoutProfiles( const MachineFunction &MF) { - const MachineConstantPool *MCP = MF.getConstantPool(); for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - if (!Op.isGlobal() && !Op.isCPI()) - continue; - if (Op.isGlobal()) { - const GlobalVariable *GV = - getLocalLinkageGlobalVariable(Op.getGlobal()); - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(GV, MF.getTarget())) - continue; - SDPI->addConstantProfileCount(GV, std::nullopt); - } else { - assert(Op.isCPI() && "Op must be constant pool index in this branch"); - int CPI = Op.getIndex(); - if (CPI == -1) - continue; - - assert(MCP != nullptr && "Constant pool info is not available."); - const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; - - if (CPE.isMachineConstantPoolEntry()) - continue; - - const Constant *C = CPE.Val.ConstVal; + const Constant *C = + getConstant(Op, MF.getTarget(), MF.getConstantPool()); + if (C) SDPI->addConstantProfileCount(C, std::nullopt); - } } } } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 6cf8a0e9d211f..ad9c7f099df56 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1074,35 +1074,35 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( MCSection *TargetLoweringObjectFileELF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, - StringRef SectionPrefix) const { + StringRef SectionSuffix) const { // TODO: Share code between this function and // MCObjectInfo::initELFMCObjectFileInfo. - if (SectionPrefix.empty()) + if (SectionSuffix.empty()) return getSectionForConstant(DL, Kind, C, Alignment); auto &Context = getContext(); if (Kind.isMergeableConst4() && MergeableConst4Section) - return Context.getELFSection(".rodata.cst4." + SectionPrefix, + return Context.getELFSection(".rodata.cst4." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); if (Kind.isMergeableConst8() && MergeableConst8Section) - return Context.getELFSection(".rodata.cst8." + SectionPrefix, + return Context.getELFSection(".rodata.cst8." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); if (Kind.isMergeableConst16() && MergeableConst16Section) - return Context.getELFSection(".rodata.cst16." + SectionPrefix, + return Context.getELFSection(".rodata.cst16." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); if (Kind.isMergeableConst32() && MergeableConst32Section) - return Context.getELFSection(".rodata.cst32." + SectionPrefix, + return Context.getELFSection(".rodata.cst32." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); if (Kind.isReadOnly()) - return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS, + return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return Context.getELFSection(".data.rel.ro" + SectionPrefix, + return Context.getELFSection(".data.rel.ro." + SectionSuffix, ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); } >From 4f91e5c74afbe35efface1031ad8ae75c7fabe1e Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Wed, 26 Mar 2025 11:25:19 -0700 Subject: [PATCH 3/4] resolve comments --- .../AArch64/constant-pool-partition.ll | 133 +++++++++++------- .../CodeGen/X86/constant-pool-partition.ll | 88 +++++++----- 2 files changed, 131 insertions(+), 90 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll index 5d2df59d34317..74b3632f39a7e 100644 --- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll +++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll @@ -14,56 +14,84 @@ ; secondly, and then hot_func. Specifically, tests that ; - If a constant is accessed by hot functions, all constant pools for this ; constant (e.g., from an unprofiled function, or cold function) should have -; `.hot` suffix. +; `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and +; @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot` +; suffix. ; - Similarly if a constant is accessed by both cold function and un-profiled ; function, constant pools for this constant should not have `.unlikely` suffix. -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 -; CHECK: .LCPI0_0: -; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 -; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 -; CHECK: .LCPI0_1: -; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 -; CHECK: .section .rodata.cst8,"aM",@progbits,8 -; CHECK: .LCPI0_2: -; CHECK: .byte 0 // 0x0 -; CHECK: .byte 4 // 0x4 -; CHECK: .byte 8 // 0x8 -; CHECK: .byte 12 // 0xc -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff - -; CHECK: .section .rodata.cst8,"aM",@progbits,8 -; CHECK: .LCPI1_0: -; CHECK: .byte 0 // 0x0 -; CHECK: .byte 4 // 0x4 -; CHECK: .byte 8 // 0x8 -; CHECK: .byte 12 // 0xc -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 -; CHECK: .LCPI1_1: -; CHECK: .word 442 // 0x1ba -; CHECK: .word 100 // 0x64 -; CHECK: .word 0 // 0x0 -; CHECK: .word 0 // 0x0 - +;; Constant pools for function @cold_func. +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 +; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff + +;; Constant pools for function @unprofiled_func +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .word 2 // 0x2 +; CHECK-NEXT: .word 3 // 0x3 +; CHECK-NEXT: .word 5 // 0x5 +; CHECK-NEXT: .word 7 // 0x7 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_2: +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 0 // 0x0 + +;; Constant pools for function @hot_func ; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 -; CHECK: .LCPI2_0: -; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 -; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 -; CHECK: .LCPI2_1: -; CHECK: .word 442 // 0x1ba -; CHECK: .word 100 // 0x64 -; CHECK: .word 0 // 0x0 -; CHECK: .word 0 // 0x0 - -; CHECK: .section .rodata.cst32,"aM",@progbits,32 -; CHECK: .globl val +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_1: +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .LCPI2_2: +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 0 // 0x0 + +;; For global variable @val +;; The section name remains `.rodata.cst32` without hotness prefix because +;; the variable has external linkage and not analyzed. Compiler need symbolized +;; data access profiles to annotate such global variables' hotness. +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK-NEXT: .globl val define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) @@ -83,14 +111,16 @@ define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) %t2 = bitcast <8 x i8> %t1 to <4 x i16> %t3 = zext <4 x i16> %t2 to <4 x i32> - %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3 + %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7> + %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4 ret <4 x i1> %cmp } define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) - %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0> - ret <4 x i1> %b + %b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a + %c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0> + ret <4 x i1> %c } @val = unnamed_addr constant i256 1 @@ -107,14 +137,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 { 7: ; preds = %7, %2 %8 = phi i32 [ 0, %2 ], [ %10, %7 ] - %9 = call i32 @rand() + %seed_val = load i256, ptr @val + %9 = call i32 @seed(i256 %seed_val) call void @hot_func(i32 %9) %10 = add i32 %8, 1 %11 = icmp eq i32 %10, 100000 br i1 %11, label %5, label %7, !prof !18 } -declare i32 @rand() +declare i32 @seed(i256) declare double @double_func() declare <4 x i32> @vector_func() declare <16 x i8> @vector_func_16i8() diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll index e39a5d2026dd7..a1f16896a6094 100644 --- a/llvm/test/CodeGen/X86/constant-pool-partition.ll +++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll @@ -25,42 +25,52 @@ target triple = "x86_64-grtev4-linux-gnu" ; RUN: -unique-section-names=false \ ; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 -; CHECK: .LCPI0_0: -; CHECK: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 -; CHECK: .LCPI0_1: -; CHECK: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 - -; CHECK: .section .rodata.cst8,"aM",@progbits,8 -; CHECK: .LCPI0_2: -; CHECK: .quad 0x3fc0000000000000 # double 0.125 - -; CHECK: .section .rodata.cst8,"aM",@progbits,8 -; CHECK: .LCPI1_0: -; CHECK: .quad 0x3fc0000000000000 # double 0.125 - -; CHECK: .section .rodata.cst4,"aM",@progbits,4 -; CHECK: .LCPI2_0: -; CHECK: .long 0x3e000000 # float 0.125 - -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 -; CHECK: .LCPI3_0: -; CHECK: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 -; CHECK: .LCPI3_1: -; CHECK: .long 2147483648 # 0x80000000 -; CHECK: .long 2147483648 # 0x80000000 -; CHECK: .long 2147483648 # 0x80000000 -; CHECK: .long 2147483648 # 0x80000000 -; CHECK: .LCPI3_2: -; CHECK: .long 2147484090 # 0x800001ba -; CHECK: .long 2147483748 # 0x80000064 -; CHECK: .long 2147483648 # 0x80000000 -; CHECK: .long 2147483648 # 0x80000000 - -; CHECK: .section .rodata.cst32,"aM",@progbits,32 -; CHECK: .globl val +;; For function @cold_func +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 +; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125 + +;; For function @unprofiled_func_double +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125 + +;; For function @unprofiled_func_float +; CHECK: .section .rodata.cst4,"aM",@progbits,4 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 0x3e000000 # float 0.125 + +;; For function @hot_func +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI3_1: +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .LCPI3_2: +; CHECK-NEXT: .long 2147484090 # 0x800001ba +; CHECK-NEXT: .long 2147483748 # 0x80000064 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 + +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK-NEXT: .globl val define double @cold_func(double %x) !prof !16 { %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) @@ -79,7 +89,6 @@ define float @unprofiled_func_float(float %x) { ret float %z } - define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0> @@ -98,14 +107,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 { 7: ; preds = %7, %2 %8 = phi i32 [ 0, %2 ], [ %10, %7 ] - %9 = call i32 @rand() + %seed_val = load i256, ptr @val + %9 = call i32 @seed(i256 %seed_val) call void @hot_func(i32 %9) %10 = add i32 %8, 1 %11 = icmp eq i32 %10, 100000 br i1 %11, label %5, label %7, !prof !18 } -declare i32 @rand() +declare i32 @seed(i256) declare double @double_func() declare i32 @func_taking_arbitrary_param(...) >From 99cd5317d963ffa312bd13247e64854ee32c9454 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Wed, 26 Mar 2025 13:11:05 -0700 Subject: [PATCH 4/4] clang-format --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index f6d9c55952c52..9ed5d33fd2524 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -58,10 +58,9 @@ class StaticDataSplitter : public MachineFunctionPass { // .data.rel.ro} sections. bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); - // Returns the constant if the operand refers to a global variable or constant + // Returns the constant if the operand refers to a global variable or constant // that gets lowered to static data sections. Otherwise, return nullptr. - const Constant *getConstant(const MachineOperand &Op, - const TargetMachine &TM, + const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM, const MachineConstantPool *MCP); // Use profiles to partition static data. @@ -247,7 +246,8 @@ void StaticDataSplitter::annotateStaticDataWithoutProfiles( for (const auto &MBB : MF) for (const MachineInstr &I : MBB) for (const MachineOperand &Op : I.operands()) - if (const Constant *C = getConstant(Op, MF.getTarget(), MF.getConstantPool())) + if (const Constant *C = + getConstant(Op, MF.getTarget(), MF.getConstantPool())) SDPI->addConstantProfileCount(C, std::nullopt); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits