Issue 138019
Summary arm64 Linux kernel boot failure after b326cb6792b3951881d63d5a02ea163921da18d9
Labels new issue
Assignees
Reporter nathanchance
    After https://github.com/llvm/llvm-project/commit/b326cb6792b3951881d63d5a02ea163921da18d9, arm64 Linux kernel builds fail to boot on certain platforms ([initially reported on LKML](https://lore.kernel.org/CA+G9fYve7+nXJNoV48TksXoMeVjgJuP8Gs=+1br+qur1dpw...@mail.gmail.com/), [downstream report](https://github.com/ClangBuiltLinux/linux/issues/2082)). As this happens early in boot (before the serial driver) and the only platform I have access to is an FVP (so limited debug options), I was able to narrow this hang to a change in `create_init_idmap()` in [`arch/arm64/kernel/pi/map_range.c`](https://elixir.bootlin.com/linux/v6.15-rc4/source/arch/arm64/kernel/pi/map_range.c#L93) with the help of [a hack patch to undo that change dynamically](https://gist.github.com/nathanchance/6a219cb868a3c6098debb82896d82612).

[map_range.i.txt](https://github.com/user-attachments/files/19983705/map_range.i.txt)
[map_range.unopt.ll.txt](https://github.com/user-attachments/files/19983706/map_range.unopt.ll.txt)
[good-map_range.opt.ll.txt](https://github.com/user-attachments/files/19983708/good-map_range.opt.ll.txt)
[bad-map_range.opt.ll.txt](https://github.com/user-attachments/files/19983709/bad-map_range.opt.ll.txt)

The diff of the optimized IR:

```diff
diff --git a/good-map_range.opt.ll.txt b/bad-map_range.opt.ll.txt
index 9eef147..e98844b 100644
--- a/good-map_range.opt.ll.txt
+++ b/bad-map_range.opt.ll.txt
@@ -141,23 +141,24 @@ entry:
   %add = add i64 %0, 4096
   store i64 %add, ptr %ptep, align 8
   %1 = load i8, ptr @arm64_use_ng_mappings, align 1, !range !11, !noundef !12
-  %loadedv = trunc nuw i8 %1 to i1
-  %or = select i1 %loadedv, i64 2051, i64 3
-  %2 = call i64 asm "mrs $0, tcr_el1", "=r"() #3, !srcloc !8
-  %and.i = and i64 %2, 576460752303423488
+  %2 = zext nneg i8 %1 to i64
+  %cond = shl nuw nsw i64 %2, 11
+  %3 = call i64 asm "mrs $0, tcr_el1", "=r"() #3, !srcloc !8
+  %and.i = and i64 %3, 576460752303423488
   %tobool.i.not = icmp eq i64 %and.i, 0
   %cond1 = select i1 %tobool.i.not, i64 768, i64 0
-  %or2 = or disjoint i64 %cond1, %or
-  %or6 = or disjoint i64 %or2, 29273397577909248
-  %or8 = or disjoint i64 %or2, 18014398509483136
+  %or = or disjoint i64 %cond1, %cond
+  %or6 = or disjoint i64 %or, 29273397577909251
+  %and = and i64 %or6, 18014398509485827
+  %or8 = or disjoint i64 %and, 128
   %not = xor i64 %clrmask, -1
   %and22 = and i64 %or8, %not
   %and25 = and i64 %or6, %not
   call void @map_range(ptr noundef nonnull %ptep, i64 noundef ptrtoint (ptr @_stext to i64), i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 noundef ptrtoint (ptr @_stext to i64), i64 %and22, i32 noundef 0, ptr noundef %pg_dir, i1 noundef false, i64 noundef 0) #4
   call void @map_range(ptr noundef nonnull %ptep, i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 noundef ptrtoint (ptr @_end to i64), i64 noundef ptrtoint (ptr @__initdata_begin to i64), i64 %and25, i32 noundef 0, ptr noundef %pg_dir, i1 noundef false, i64 noundef 0) #4
-  %3 = load i64, ptr %ptep, align 8
+  %4 = load i64, ptr %ptep, align 8
   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ptep) #5
-  ret i64 %3
+  ret i64 %4
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)

```

The object file diff:

```diff
diff --git a/tmp/.psub.9lwVS5WRhB b/tmp/.psub.RgCvbpYhdV
index 7c4d9d9..c046c52 100644
--- a/tmp/.psub.9lwVS5WRhB
+++ b/tmp/.psub.RgCvbpYhdV
@@ -1,5 +1,5 @@
 
-build/linux/good/arch/arm64/kernel/pi/map_range.o:	file format elf64-littleaarch64
+build/linux/bad/arch/arm64/kernel/pi/map_range.o:	file format elf64-littleaarch64
 
 Disassembly of section .init.text:
 
@@ -128,52 +128,47 @@ create_init_idmap>:
  d10103ff     	sub	sp, sp, #0x40
 90000008     	adrp	x8, 0x0 <map_range>
 		00000000000001dc: R_AARCH64_ADR_PREL_PG_HI21	arm64_use_ng_mappings
- 52810069     	mov	w9, #0x803              // =2051
- d538204a     	mrs	x10, TCR_EL1
+ d5382049 	mrs	x9, TCR_EL1
+ f245013f     	tst	x9, #0x800000000000000
 39400108     	ldrb	w8, [x8]
 		00000000000001e8: R_AARCH64_LDST8_ABS_LO12_NC	arm64_use_ng_mappings
- d280900b 	mov	x11, #0x480             // =1152
+ 52806009     	mov	w9, #0x300 // =768
  a90257f6     	stp	x22, x21, [sp, #0x20]
- f2e0080b 	movk	x11, #0x40, lsl #48
+ 9a9f0129     	csel	x9, x9, xzr, eq
 a9034ff4     	stp	x20, x19, [sp, #0x30]
  aa0103f3     	mov	x19, x1
- 7100011f     	cmp	w8, #0x0
- 52800068     	mov	w8, #0x3 // =3
+ aa082d28     	orr	x8, x9, x8, lsl #11
+ d2808069     	mov	x9, #0x403              // =1027
  aa0003f4     	mov	x20, x0
- 9a881128 	csel	x8, x9, x8, ne
- f245015f     	tst	x10, #0x800000000000000
- 52806009     	mov	w9, #0x300              // =768
- 9a9f0129 	csel	x9, x9, xzr, eq
+ f2e00d09     	movk	x9, #0x68, lsl #48
 90000015     	adrp	x21, 0x0 <map_range>
-		000000000000021c: R_AARCH64_ADR_PREL_PG_HI21	__initdata_begin
+		0000000000000210: R_AARCH64_ADR_PREL_PG_HI21	__initdata_begin
  910002b5     	add	x21, x21, #0x0
-		0000000000000220:  R_AARCH64_ADD_ABS_LO12_NC	__initdata_begin
- aa080128     	orr	x8, x9, x8
- d2808009     	mov	x9, #0x400 // =1024
+		0000000000000214: R_AARCH64_ADD_ABS_LO12_NC	__initdata_begin
+ aa090116     	orr	x22, x8, x9
+ 91400408     	add	x8, x0, #0x1, lsl #12   // =0x1000
  90000001 	adrp	x1, 0x0 <map_range>
-		000000000000022c: R_AARCH64_ADR_PREL_PG_HI21	_stext
+		0000000000000220: R_AARCH64_ADR_PREL_PG_HI21	_stext
  91000021     	add	x1, x1, #0x0
-		0000000000000230:  R_AARCH64_ADD_ABS_LO12_NC	_stext
- f2e00d09 	movk	x9, #0x68, lsl #48
- 9140040a     	add	x10, x0, #0x1, lsl #12 // =0x1000
- aa090116     	orr	x22, x8, x9
- aa0b0108     	orr	x8, x8, x11
+		0000000000000224:  R_AARCH64_ADD_ABS_LO12_NC	_stext
+ 924af2c9 	and	x9, x22, #0xffc7ffffffffffff
+ a90023ff     	stp	xzr, x8, [sp]
+ b2790128     	orr	x8, x9, #0x80
  910023e0     	add	x0, sp, #0x8
- 8a330104     	bic	x4, x8, x19
  aa1503e2     	mov	x2, x21
+ 8a330104 	bic	x4, x8, x19
  aa0103e3     	mov	x3, x1
  2a1f03e5     	mov	w5, wzr
  aa1403e6     	mov	x6, x20
  2a1f03e7     	mov	w7, wzr
  a9017bfd 	stp	x29, x30, [sp, #0x10]
  910043fd     	add	x29, sp, #0x10
- a9002bff     	stp	xzr, x10, [sp]
- 94000000     	bl	0x26c <create_init_idmap+0x94>
-		000000000000026c: R_AARCH64_CALL26	map_range
+ 94000000     	bl	0x258 <create_init_idmap+0x80>
+		0000000000000258: R_AARCH64_CALL26	map_range
  90000002     	adrp	x2, 0x0 <map_range>
-		0000000000000270: R_AARCH64_ADR_PREL_PG_HI21	_end
+		000000000000025c: R_AARCH64_ADR_PREL_PG_HI21	_end
  91000042     	add	x2, x2, #0x0
-		0000000000000274: R_AARCH64_ADD_ABS_LO12_NC	_end
+		0000000000000260: R_AARCH64_ADD_ABS_LO12_NC	_end
  910023e0     	add	x0, sp, #0x8
 8a3302c4     	bic	x4, x22, x19
  aa1503e1     	mov	x1, x21
@@ -182,8 +177,8 @@ create_init_idmap>:
  aa1403e6     	mov	x6, x20
  2a1f03e7 	mov	w7, wzr
  f90003ff     	str	xzr, [sp]
- 94000000     	bl	0x298 <create_init_idmap+0xc0>
-		0000000000000298: R_AARCH64_CALL26	map_range
+ 94000000     	bl	0x284 <create_init_idmap+0xac>
+		0000000000000284: R_AARCH64_CALL26	map_range
  a9434ff4     	ldp	x20, x19, [sp, #0x30]
 f94007e0     	ldr	x0, [sp, #0x8]
  a94257f6     	ldp	x22, x21, [sp, #0x20]

```

If I replace `arch/arm64/kernel/pi/map_range.o` by running `llc --filetype obj` on either the good optimized IR or the unoptimized IR, the kernel boots fine. As soon as I use the bad optimized IR, there is a hang. My initial suspicion is that `PAGE_KERNEL_ROX` is not getting optimized correctly but my assembly knowledge is not very good so I might be misunderstanding something.

The command to generate the `.o` from the `.i` if necessary.

```
clang --target=aarch64-linux-gnu -fintegrated-as -mlittle-endian -std=gnu11 -fshort-wchar -funsigned-char -fno-common -fno-PIE -fno-strict-aliasing -mgeneral-regs-only -Wno-psabi -fno-asynchronous-unwind-tables -fno-unwind-tables -mbranch-protection=pac-ret+bti -Wa,-march=armv8.5-a -fno-delete-null-pointer-checks -O2 -fstack-protector-strong -fno-omit-frame-pointer -fno-optimize-sibling-calls -ftrivial-auto-var-init=zero -fno-stack-clash-protection -falign-functions=4 -fstrict-flex-arrays=3 -fno-strict-overflow -fno-stack-check -fno-builtin-wcslen -Wall -Wundef -Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type -Werror=strict-prototypes -Wno-format-security -Wno-trigraphs -Wno-frame-address -Wno-address-of-packed-member -Wmissing-declarations -Wmissing-prototypes -Wframe-larger-than=2048 -Wno-gnu -Wno-format-overflow-non-kprintf -Wno-format-truncation-non-kprintf -Wvla -Wno-pointer-sign -Wcast-function-type -Wimplicit-fallthrough -Werror=date-time -Werror=incompatible-pointer-types-Wenum-conversion -Wextra -Wunused -Wno-unused-but-set-variable -Wno-unused-const-variable -Wno-format-overflow -Wno-override-init -Wno-pointer-to-enum-cast -Wno-tautological-constant-out-of-range-compare -Wno-unaligned-access -Wno-enum-compare-conditional -Wno-missing-field-initializers -Wno-type-limits -Wno-shift-negative-value -Wno-enum-enum-conversion -Wno-sign-compare -Wno-unused-parameter -mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=1240 -fpie -Os -mbranch-protection=none -fno-stack-protector -ffreestanding -fno-asynchronous-unwind-tables -fno-unwind-tables -fno-addrsig -mstrict-align -c map_range.i
```

Please feel free to give this issue a more descriptive title if necessary.

cc @andjo403 @nikic @dtcxzyw
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to