================ @@ -0,0 +1,141 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Repeat the RUN command above for big-endian systems. +; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from cold_func first, unprofiled_func +; secondly, and then hot_func. Specifically, tests that +; - If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; `.hot` suffix. +; - Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have `.unlikely` suffix. + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI0_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK: .LCPI0_1: +; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI0_2: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff + +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK: .LCPI1_0: +; CHECK: .byte 0 // 0x0 +; CHECK: .byte 4 // 0x4 +; CHECK: .byte 8 // 0x8 +; CHECK: .byte 12 // 0xc +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .byte 255 // 0xff +; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI1_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .LCPI2_0: +; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK: .LCPI2_1: +; CHECK: .word 442 // 0x1ba +; CHECK: .word 100 // 0x64 +; CHECK: .word 0 // 0x0 +; CHECK: .word 0 // 0x0 + +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK: .globl val + +define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t2 = bitcast <8 x i8> %t1 to <2 x i32> + %3 = extractelement <2 x i32> %t2, i32 1 + %sum = add i32 %2, %3 + %ret = add i32 %sum, %num + ret i32 %ret +} + +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare i32 @func_taking_arbitrary_param(...) + +define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t2 = bitcast <8 x i8> %t1 to <4 x i16> + %t3 = zext <4 x i16> %t2 to <4 x i32> + %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3 + ret <4 x i1> %cmp +} + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0> + ret <4 x i1> %b +} + +@val = unnamed_addr constant i256 1 ---------------- mingmingl-llvm wrote:
Updated the test case to use this `@val` in a hot basic block in `@main`. The section suffix remains empty (not `.hot` or `.unlikely`) though, because `@val` has external linkage and the static-data-splitter pass only analyzes local-linkage vars. For such external-linkage global variables, compiler can definitely assign `.hot` suffix if they are used by a hot block, but compiler should not assign `.unlikely` suffix if they appear cold in this module. Given the plan is to map `.<section>.*.hot` and `.<section>` input sections to `.<section>`, and map `.<section>.*.unlikely` inputs to `.<section>.unlikely` , it's fine to not assign `.hot` here. After symbolized data access profiles are provided, compiler can use it to tell hot and cold global variables without whole-program analysis. https://github.com/llvm/llvm-project/pull/129781 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits