https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/125993
>From 169486b930c12eb7521fe7ce3c3e6f360824af22 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Wed, 5 Feb 2025 18:46:36 -0800 Subject: [PATCH 1/2] [nfc]asm printer jump table --- llvm/include/llvm/CodeGen/AsmPrinter.h | 5 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 52 ++-- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 15 +- .../CodeGen/AArch64/jump-table-partition.ll | 252 ++++++++++++++++++ 4 files changed, 285 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/jump-table-partition.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 3da63af5ba5716c..9ef9888af990c63 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -893,9 +893,8 @@ class AsmPrinter : public MachineFunctionPass { // Internal Implementation Details //===------------------------------------------------------------------===// - void emitJumpTableImpl(const MachineJumpTableInfo &MJTI, - ArrayRef<unsigned> JumpTableIndices, - bool JTInDiffSection); + virtual void emitJumpTableImpl(const MachineJumpTableInfo &MJTI, + ArrayRef<unsigned> JumpTableIndices); void emitJumpTableEntry(const MachineJumpTableInfo &MJTI, const MachineBasicBlock *MBB, unsigned uid) const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 44b10c3ef997267..c07755b269c95cc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2855,22 +2855,12 @@ void AsmPrinter::emitConstantPool() { void AsmPrinter::emitJumpTableInfo() { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; - if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - // Pick the directive to use to print the jump table entries, and switch to - // the appropriate section. - const Function &F = MF->getFunction(); - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, - F); - if (!TM.Options.EnableStaticDataPartitioning) { - emitJumpTableImpl(*MJTI, llvm::to_vector(llvm::seq<unsigned>(JT.size())), - JTInDiffSection); + emitJumpTableImpl(*MJTI, llvm::to_vector(llvm::seq<unsigned>(JT.size()))); return; } @@ -2886,38 +2876,46 @@ void AsmPrinter::emitJumpTableInfo() { } } - emitJumpTableImpl(*MJTI, HotJumpTableIndices, JTInDiffSection); - emitJumpTableImpl(*MJTI, ColdJumpTableIndices, JTInDiffSection); + emitJumpTableImpl(*MJTI, HotJumpTableIndices); + emitJumpTableImpl(*MJTI, ColdJumpTableIndices); } void AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI, - ArrayRef<unsigned> JumpTableIndices, - bool JTInDiffSection) { - if (JumpTableIndices.empty()) + ArrayRef<unsigned> JumpTableIndices) { + if (MJTI.getEntryKind() == MachineJumpTableInfo::EK_Inline || + JumpTableIndices.empty()) return; const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const Function &F = MF->getFunction(); const std::vector<MachineJumpTableEntry> &JT = MJTI.getJumpTables(); MCSection *JumpTableSection = nullptr; - if (TM.Options.EnableStaticDataPartitioning) { - JumpTableSection = - TLOF.getSectionForJumpTable(F, TM, &JT[JumpTableIndices.front()]); - } else { - JumpTableSection = TLOF.getSectionForJumpTable(F, TM); - } - const DataLayout &DL = MF->getDataLayout(); + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + const bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( + MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64, + F); if (JTInDiffSection) { + if (TM.Options.EnableStaticDataPartitioning) { + JumpTableSection = + TLOF.getSectionForJumpTable(F, TM, &JT[JumpTableIndices.front()]); + } else { + JumpTableSection = TLOF.getSectionForJumpTable(F, TM); + } OutStreamer->switchSection(JumpTableSection); } + const DataLayout &DL = MF->getDataLayout(); + emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout()))); - // Jump tables in code sections are marked with a data_region directive - // where that's supported. - if (!JTInDiffSection) + if (!JTInDiffSection) { + // Jump tables in code sections are marked with a data_region directive + // where that's supported. OutStreamer->emitDataRegion(MCDR_DataRegionJT32); + } for (const unsigned JumpTableIndex : JumpTableIndices) { ArrayRef<MachineBasicBlock *> JTBBs = JT[JumpTableIndex].MBBs; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index f1f25b65fc53fac..dbbd49160160b6b 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -112,7 +112,8 @@ class AArch64AsmPrinter : public AsmPrinter { const MCExpr *lowerBlockAddressConstant(const BlockAddress &BA) override; void emitStartOfAsmFile(Module &M) override; - void emitJumpTableInfo() override; + void emitJumpTableImpl(const MachineJumpTableInfo &MJTI, + ArrayRef<unsigned> JumpTableIndices) override; std::tuple<const MCSymbol *, uint64_t, const MCSymbol *, codeview::JumpTableEntrySize> getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr, @@ -1273,19 +1274,15 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, printOperand(MI, NOps - 2, OS); } -void AArch64AsmPrinter::emitJumpTableInfo() { - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (!MJTI) return; - - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - if (JT.empty()) return; - +void AArch64AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI, + ArrayRef<unsigned> JumpTableIndices) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); OutStreamer->switchSection(ReadOnlySec); + const std::vector<MachineJumpTableEntry> &JT = MJTI.getJumpTables(); auto AFI = MF->getInfo<AArch64FunctionInfo>(); - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + for (unsigned JTI : JumpTableIndices) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; // If this jump table was deleted, ignore it. diff --git a/llvm/test/CodeGen/AArch64/jump-table-partition.ll b/llvm/test/CodeGen/AArch64/jump-table-partition.ll new file mode 100644 index 000000000000000..1b1292cec9ab00d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/jump-table-partition.ll @@ -0,0 +1,252 @@ + + +; The llc commands override two options +; - 'aarch64-enable-atomic-cfg-tidy' to false to turn off simplifycfg pass, +; which can simplify away switch instructions before isel lowers switch instructions. +; - 'aarch64-min-jump-table-entries' so 'switch' needs fewer cases to generate +; a jump table. + +; The static-data-splitter pass doesn't run. +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -function-sections=true \ +; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \ +; RUN: -unique-section-names=true %s -o - 2>&1 | FileCheck %s --check-prefixes=DEFAULT + +; DEFAULT: .section .rodata.hot.foo,"a",@progbits +; DEFAULT: .LJTI0_0: +; DEFAULT: .LJTI0_1: +; DEFAULT: .LJTI0_2: +; DEFAULT: .LJTI0_3: +; DEFAULT: .section .rodata.func_without_profile,"a",@progbits +; DEFAULT: .LJTI1_0: +; DEFAULT: .section .rodata.bar_prefix.bar,"a",@progbits +; DEFAULT: .LJTI2_0 + +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \ +; RUN: -unique-section-names=false %s -o - 2>&1 | FileCheck %s --check-prefixes=NUM,JT + +; Section names will optionally have `.<func>` if -function-sections is enabled. +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=FUNC,JT + +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=false \ +; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=FUNCLESS,JT + +; In function @foo, the 2 switch instructions to jt0.* and jt1.* are placed in +; hot-prefixed sections, and the 2 switch instructions to jt2.* and jt3.* are +; placed in cold-prefixed sections. +; NUM: .section .rodata.hot.,"a",@progbits,unique,2 +; FUNC: .section .rodata.hot.foo,"a",@progbits +; FUNCLESS: .section .rodata.hot.,"a",@progbits +; JT: .LJTI0_0: +; JT: .LJTI0_2: +; NUM: .section .rodata.hot.,"a",@progbits,unique,3 +; FUNC-NOT: .section .rodata.hot.foo +; FUNCLESS-NOT: .section .rodata.hot.,"a",@progbits +; JT: .LJTI0_1: +; JT: .LJTI0_3: + +; @func_without_profile simulates the functions without profile information +; (e.g., not instrumented or not profiled), its jump tables are placed in +; sections without hot or unlikely prefixes. +; NUM: .section .rodata,"a",@progbits,unique,5 +; FUNC: .section .rodata.func_without_profile,"a",@progbits +; FUNCLESS: .section .rodata,"a",@progbits +; JT: .LJTI1_0: + +; @bar doesn't have profile information and it has a section prefix. +; Tests that its jump tables are placed in sections with function prefixes. +; NUM: .section .rodata.bar_prefix.,"a",@progbits,unique, +; FUNC: .section .rodata.bar_prefix.bar +; FUNCLESS: .section .rodata.bar_prefix.,"a" +; JT: .LJTI2_0 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@str.9 = private constant [7 x i8] c".str.9\00" +@str.10 = private constant [8 x i8] c".str.10\00" +@str.11 = private constant [8 x i8] c".str.11\00" + +@case2 = private constant [7 x i8] c"case 2\00" +@case1 = private constant [7 x i8] c"case 1\00" +@default = private constant [8 x i8] c"default\00" +@jt3 = private constant [4 x i8] c"jt3\00" + +; jt0 and jt2 are hot. jt1 and jt3 are cold. +define i32 @foo(i32 %num) !prof !13 { +entry: + %mod3 = sdiv i32 %num, 3 + switch i32 %mod3, label %jt0.default [ + i32 1, label %jt0.bb1 + i32 2, label %jt0.bb2 + ], !prof !14 + +jt0.bb1: + call i32 @puts(ptr @case1) + br label %jt0.epilog + +jt0.bb2: + call i32 @puts(ptr @case2) + br label %jt0.epilog + +jt0.default: + call i32 @puts(ptr @default) + br label %jt0.epilog + +jt0.epilog: + %zero = icmp eq i32 %num, 0 + br i1 %zero, label %hot, label %cold, !prof !17 + +hot: + %c2 = call i32 @transform(i32 %num) + switch i32 %c2, label %jt2.default [ + i32 1, label %jt2.bb1 + i32 2, label %jt2.bb2 + ], !prof !14 + +jt2.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog + +jt2.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog + +jt2.default: + call i32 @puts(ptr @default) + br label %jt2.epilog + +jt2.epilog: + %c2cmp = icmp ne i32 %c2, 0 + br i1 %c2cmp, label %return, label %jt3.prologue, !prof !18 + +cold: + %c1 = call i32 @compute(i32 %num) + switch i32 %c1, label %jt1.default [ + i32 1, label %jt1.bb1 + i32 2, label %jt1.bb2 + ], !prof !14 + +jt1.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog + +jt1.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog + +jt1.default: + call i32 @puts(ptr @default) + br label %jt1.epilog + +jt1.epilog: + br label %return + +jt3.prologue: + %c3 = call i32 @cleanup(i32 %num) + switch i32 %c3, label %jt3.default [ + i32 1, label %jt3.bb1 + i32 2, label %jt3.bb2 + ], !prof !14 + +jt3.bb1: + call i32 @puts(ptr @case1) + br label %jt3.epilog + +jt3.bb2: + call i32 @puts(ptr @case2) + br label %jt3.epilog + +jt3.default: + call i32 @puts(ptr @default) + br label %jt3.epilog + +jt3.epilog: + call i32 @puts(ptr @jt3) + br label %return + +return: + ret i32 %mod3 +} + +define void @func_without_profile(i32 %num) { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: + call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: + call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.default: + call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: + ret void +} + +define void @bar(i32 %num) !section_prefix !20 { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: + call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: + call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.default: + call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: + ret void +} + +declare i32 @puts(ptr) +declare i32 @printf(ptr, ...) +declare i32 @compute(i32) +declare i32 @transform(i32) +declare i32 @cleanup(i32) + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 230002} +!4 = !{!"MaxCount", i64 100000} +!5 = !{!"MaxInternalCount", i64 50000} +!6 = !{!"MaxFunctionCount", i64 100000} +!7 = !{!"NumCounts", i64 14} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12} +!11 = !{i32 990000, i64 10000, i32 7} +!12 = !{i32 999999, i64 1, i32 9} +!13 = !{!"function_entry_count", i64 100000} +!14 = !{!"branch_weights", i32 60000, i32 20000, i32 20000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0} +!17 = !{!"branch_weights", i32 99999, i32 1} +!18 = !{!"branch_weights", i32 99998, i32 1} +!19 = !{!"branch_weights", i32 97000, i32 1000, i32 1000, i32 1000} +!20 = !{!"function_section_prefix", !"bar_prefix"} >From bbec612436decac5a87fdc1cb4789e0458339004 Mon Sep 17 00:00:00 2001 From: mingmingl <mingmi...@google.com> Date: Wed, 5 Feb 2025 22:32:37 -0800 Subject: [PATCH 2/2] Fast return in aarch64 impl --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 7 +++---- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 3 +++ .../test/CodeGen/AArch64/jump-table-partition.ll | 16 ++++++++-------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c07755b269c95cc..7a4b43379570a40 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2908,12 +2908,11 @@ void AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI, } const DataLayout &DL = MF->getDataLayout(); + emitAlignment(Align(MJTI.getEntryAlignment(DL))); - emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout()))); - + // Jump tables in code sections are marked with a data_region directive + // where that's supported. if (!JTInDiffSection) { - // Jump tables in code sections are marked with a data_region directive - // where that's supported. OutStreamer->emitDataRegion(MCDR_DataRegionJT32); } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index dbbd49160160b6b..c92c203f247954b 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1276,6 +1276,9 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, void AArch64AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI, ArrayRef<unsigned> JumpTableIndices) { + // Fast return if there is nothing to emit to avoid creating empty sections. + if (JumpTableIndices.empty()) + return; const TargetLoweringObjectFile &TLOF = getObjFileLowering(); MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); OutStreamer->switchSection(ReadOnlySec); diff --git a/llvm/test/CodeGen/AArch64/jump-table-partition.ll b/llvm/test/CodeGen/AArch64/jump-table-partition.ll index 45fb3788993f21d..759f5359b17e14f 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-partition.ll +++ b/llvm/test/CodeGen/AArch64/jump-table-partition.ll @@ -37,12 +37,12 @@ ; A function's section prefix is used for all jump tables of this function. ; @foo is hot so its jump table data section has a hot prefix. -; NUM: .section .rodata.hot.,"a",@progbits,unique,2 +; NUM: .section .rodata.hot.,"a",@progbits,unique,2 ; FUNC: .section .rodata.hot.foo,"a",@progbits ; FUNCLESS: .section .rodata.hot.,"a",@progbits ; JT: .LJTI0_0: ; JT: .LJTI0_2: -; NUM: .section .rodata.hot.,"a",@progbits,unique,3 +; NUM: .section .rodata.hot.,"a",@progbits,unique,3 ; FUNC-NOT: .section .rodata.hot.foo ; FUNCLESS-NOT: .section .rodata.hot.,"a",@progbits ; JT: .LJTI0_1: @@ -50,16 +50,16 @@ ; func_without_profile doesn't have profiles, so its jumptable doesn't have ; hotness-based prefix. -; NUM: .section .rodata,"a",@progbits,unique, -; FUNC: .section .rodata.func_without_profile,"a",@progbits -; FUNCLESS: .section .rodata,"a",@progbits +; NUM: .section .rodata,"a",@progbits,unique,5 +; FUNC: .section .rodata.func_without_profile,"a",@progbits +; FUNCLESS: .section .rodata,"a",@progbits ; JT: .LJTI1_0: ; @bar doesn't have profile information and it has a section prefix. ; Tests that its jump tables are placed in sections with function prefixes. -; NUM: .section .rodata.bar_prefix.,"a",@progbits,unique, -; FUNC: .section .rodata.bar_prefix.bar -; FUNCLESS: .section .rodata.bar_prefix.,"a" +; NUM: .section .rodata.bar_prefix.,"a",@progbits,unique,7 +; FUNC: .section .rodata.bar_prefix.bar +; FUNCLESS: .section .rodata.bar_prefix.,"a" ; JT: .LJTI2_0 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits