Issue |
138019
|
Summary |
arm64 Linux kernel boot failure after b326cb6792b3951881d63d5a02ea163921da18d9
|
Labels |
new issue
|
Assignees |
|
Reporter |
nathanchance
|
After https://github.com/llvm/llvm-project/commit/b326cb6792b3951881d63d5a02ea163921da18d9, arm64 Linux kernel builds fail to boot on certain platforms ([initially reported on LKML](https://lore.kernel.org/CA+G9fYve7+nXJNoV48TksXoMeVjgJuP8Gs=+1br+qur1dpw...@mail.gmail.com/), [downstream report](https://github.com/ClangBuiltLinux/linux/issues/2082)). As this happens early in boot (before the serial driver) and the only platform I have access to is an FVP (so limited debug options), I was able to narrow this hang to a change in `create_init_idmap()` in [`arch/arm64/kernel/pi/map_range.c`](https://elixir.bootlin.com/linux/v6.15-rc4/source/arch/arm64/kernel/pi/map_range.c#L93) with the help of [a hack patch to undo that change dynamically](https://gist.github.com/nathanchance/6a219cb868a3c6098debb82896d82612).
[map_range.i.txt](https://github.com/user-attachments/files/19983705/map_range.i.txt)
[map_range.unopt.ll.txt](https://github.com/user-attachments/files/19983706/map_range.unopt.ll.txt)
[good-map_range.opt.ll.txt](https://github.com/user-attachments/files/19983708/good-map_range.opt.ll.txt)
[bad-map_range.opt.ll.txt](https://github.com/user-attachments/files/19983709/bad-map_range.opt.ll.txt)
The diff of the optimized IR:
```diff
diff --git a/good-map_range.opt.ll.txt b/bad-map_range.opt.ll.txt
index 9eef147..e98844b 100644
--- a/good-map_range.opt.ll.txt
+++ b/bad-map_range.opt.ll.txt
@@ -141,23 +141,24 @@ entry:
%add = add i64 %0, 4096
store i64 %add, ptr %ptep, align 8
%1 = load i8, ptr @arm64_use_ng_mappings, align 1, !range !11, !noundef !12
- %loadedv = trunc nuw i8 %1 to i1
- %or = select i1 %loadedv, i64 2051, i64 3
- %2 = call i64 asm "mrs $0, tcr_el1", "=r"() #3, !srcloc !8
- %and.i = and i64 %2, 576460752303423488
+ %2 = zext nneg i8 %1 to i64
+ %cond = shl nuw nsw i64 %2, 11
+ %3 = call i64 asm "mrs $0, tcr_el1", "=r"() #3, !srcloc !8
+ %and.i = and i64 %3, 576460752303423488
%tobool.i.not = icmp eq i64 %and.i, 0
%cond1 = select i1 %tobool.i.not, i64 768, i64 0
- %or2 = or disjoint i64 %cond1, %or
- %or6 = or disjoint i64 %or2, 29273397577909248
- %or8 = or disjoint i64 %or2, 18014398509483136
+ %or = or disjoint i64 %cond1, %cond
+ %or6 = or disjoint i64 %or, 29273397577909251
+ %and = and i64 %or6, 18014398509485827
+ %or8 = or disjoint i64 %and, 128
%not = xor i64 %clrmask, -1
%and22 = and i64 %or8, %not
%and25 = and i64 %or6, %not
call void @map_range(ptr noundef nonnull %ptep, i64 noundef ptrtoint (ptr @_stext to i64), i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 noundef ptrtoint (ptr @_stext to i64), i64 %and22, i32 noundef 0, ptr noundef %pg_dir, i1 noundef false, i64 noundef 0) #4
call void @map_range(ptr noundef nonnull %ptep, i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 noundef ptrtoint (ptr @_end to i64), i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 %and25, i32 noundef 0, ptr noundef %pg_dir, i1 noundef false, i64 noundef 0) #4
- %3 = load i64, ptr %ptep, align 8
+ %4 = load i64, ptr %ptep, align 8
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ptep) #5
- ret i64 %3
+ ret i64 %4
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
```
The object file diff:
```diff
diff --git a/tmp/.psub.9lwVS5WRhB b/tmp/.psub.RgCvbpYhdV
index 7c4d9d9..c046c52 100644
--- a/tmp/.psub.9lwVS5WRhB
+++ b/tmp/.psub.RgCvbpYhdV
@@ -1,5 +1,5 @@
-build/linux/good/arch/arm64/kernel/pi/map_range.o: file format elf64-littleaarch64
+build/linux/bad/arch/arm64/kernel/pi/map_range.o: file format elf64-littleaarch64
Disassembly of section .init.text:
@@ -128,52 +128,47 @@ create_init_idmap>:
d10103ff sub sp, sp, #0x40
90000008 adrp x8, 0x0 <map_range>
00000000000001dc: R_AARCH64_ADR_PREL_PG_HI21 arm64_use_ng_mappings
- 52810069 mov w9, #0x803 // =2051
- d538204a mrs x10, TCR_EL1
+ d5382049 mrs x9, TCR_EL1
+ f245013f tst x9, #0x800000000000000
39400108 ldrb w8, [x8]
00000000000001e8: R_AARCH64_LDST8_ABS_LO12_NC arm64_use_ng_mappings
- d280900b mov x11, #0x480 // =1152
+ 52806009 mov w9, #0x300 // =768
a90257f6 stp x22, x21, [sp, #0x20]
- f2e0080b movk x11, #0x40, lsl #48
+ 9a9f0129 csel x9, x9, xzr, eq
a9034ff4 stp x20, x19, [sp, #0x30]
aa0103f3 mov x19, x1
- 7100011f cmp w8, #0x0
- 52800068 mov w8, #0x3 // =3
+ aa082d28 orr x8, x9, x8, lsl #11
+ d2808069 mov x9, #0x403 // =1027
aa0003f4 mov x20, x0
- 9a881128 csel x8, x9, x8, ne
- f245015f tst x10, #0x800000000000000
- 52806009 mov w9, #0x300 // =768
- 9a9f0129 csel x9, x9, xzr, eq
+ f2e00d09 movk x9, #0x68, lsl #48
90000015 adrp x21, 0x0 <map_range>
- 000000000000021c: R_AARCH64_ADR_PREL_PG_HI21 __initdata_begin
+ 0000000000000210: R_AARCH64_ADR_PREL_PG_HI21 __initdata_begin
910002b5 add x21, x21, #0x0
- 0000000000000220: R_AARCH64_ADD_ABS_LO12_NC __initdata_begin
- aa080128 orr x8, x9, x8
- d2808009 mov x9, #0x400 // =1024
+ 0000000000000214: R_AARCH64_ADD_ABS_LO12_NC __initdata_begin
+ aa090116 orr x22, x8, x9
+ 91400408 add x8, x0, #0x1, lsl #12 // =0x1000
90000001 adrp x1, 0x0 <map_range>
- 000000000000022c: R_AARCH64_ADR_PREL_PG_HI21 _stext
+ 0000000000000220: R_AARCH64_ADR_PREL_PG_HI21 _stext
91000021 add x1, x1, #0x0
- 0000000000000230: R_AARCH64_ADD_ABS_LO12_NC _stext
- f2e00d09 movk x9, #0x68, lsl #48
- 9140040a add x10, x0, #0x1, lsl #12 // =0x1000
- aa090116 orr x22, x8, x9
- aa0b0108 orr x8, x8, x11
+ 0000000000000224: R_AARCH64_ADD_ABS_LO12_NC _stext
+ 924af2c9 and x9, x22, #0xffc7ffffffffffff
+ a90023ff stp xzr, x8, [sp]
+ b2790128 orr x8, x9, #0x80
910023e0 add x0, sp, #0x8
- 8a330104 bic x4, x8, x19
aa1503e2 mov x2, x21
+ 8a330104 bic x4, x8, x19
aa0103e3 mov x3, x1
2a1f03e5 mov w5, wzr
aa1403e6 mov x6, x20
2a1f03e7 mov w7, wzr
a9017bfd stp x29, x30, [sp, #0x10]
910043fd add x29, sp, #0x10
- a9002bff stp xzr, x10, [sp]
- 94000000 bl 0x26c <create_init_idmap+0x94>
- 000000000000026c: R_AARCH64_CALL26 map_range
+ 94000000 bl 0x258 <create_init_idmap+0x80>
+ 0000000000000258: R_AARCH64_CALL26 map_range
90000002 adrp x2, 0x0 <map_range>
- 0000000000000270: R_AARCH64_ADR_PREL_PG_HI21 _end
+ 000000000000025c: R_AARCH64_ADR_PREL_PG_HI21 _end
91000042 add x2, x2, #0x0
- 0000000000000274: R_AARCH64_ADD_ABS_LO12_NC _end
+ 0000000000000260: R_AARCH64_ADD_ABS_LO12_NC _end
910023e0 add x0, sp, #0x8
8a3302c4 bic x4, x22, x19
aa1503e1 mov x1, x21
@@ -182,8 +177,8 @@ create_init_idmap>:
aa1403e6 mov x6, x20
2a1f03e7 mov w7, wzr
f90003ff str xzr, [sp]
- 94000000 bl 0x298 <create_init_idmap+0xc0>
- 0000000000000298: R_AARCH64_CALL26 map_range
+ 94000000 bl 0x284 <create_init_idmap+0xac>
+ 0000000000000284: R_AARCH64_CALL26 map_range
a9434ff4 ldp x20, x19, [sp, #0x30]
f94007e0 ldr x0, [sp, #0x8]
a94257f6 ldp x22, x21, [sp, #0x20]
```
If I replace `arch/arm64/kernel/pi/map_range.o` by running `llc --filetype obj` on either the good optimized IR or the unoptimized IR, the kernel boots fine. As soon as I use the bad optimized IR, there is a hang. My initial suspicion is that `PAGE_KERNEL_ROX` is not getting optimized correctly but my assembly knowledge is not very good so I might be misunderstanding something.
The command to generate the `.o` from the `.i` if necessary.
```
clang --target=aarch64-linux-gnu -fintegrated-as -mlittle-endian -std=gnu11 -fshort-wchar -funsigned-char -fno-common -fno-PIE -fno-strict-aliasing -mgeneral-regs-only -Wno-psabi -fno-asynchronous-unwind-tables -fno-unwind-tables -mbranch-protection=pac-ret+bti -Wa,-march=armv8.5-a -fno-delete-null-pointer-checks -O2 -fstack-protector-strong -fno-omit-frame-pointer -fno-optimize-sibling-calls -ftrivial-auto-var-init=zero -fno-stack-clash-protection -falign-functions=4 -fstrict-flex-arrays=3 -fno-strict-overflow -fno-stack-check -fno-builtin-wcslen -Wall -Wundef -Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type -Werror=strict-prototypes -Wno-format-security -Wno-trigraphs -Wno-frame-address -Wno-address-of-packed-member -Wmissing-declarations -Wmissing-prototypes -Wframe-larger-than=2048 -Wno-gnu -Wno-format-overflow-non-kprintf -Wno-format-truncation-non-kprintf -Wvla -Wno-pointer-sign -Wcast-function-type -Wimplicit-fallthrough -Werror=date-time -Werror=incompatible-pointer-types-Wenum-conversion -Wextra -Wunused -Wno-unused-but-set-variable -Wno-unused-const-variable -Wno-format-overflow -Wno-override-init -Wno-pointer-to-enum-cast -Wno-tautological-constant-out-of-range-compare -Wno-unaligned-access -Wno-enum-compare-conditional -Wno-missing-field-initializers -Wno-type-limits -Wno-shift-negative-value -Wno-enum-enum-conversion -Wno-sign-compare -Wno-unused-parameter -mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=1240 -fpie -Os -mbranch-protection=none -fno-stack-protector -ffreestanding -fno-asynchronous-unwind-tables -fno-unwind-tables -fno-addrsig -mstrict-align -c map_range.i
```
Please feel free to give this issue a more descriptive title if necessary.
cc @andjo403 @nikic @dtcxzyw
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs