This became enough of a bother after crashing twice for me to read the
code. I think it's fairly clearly broken, somebody tell me if I misread
something. Grab a sys/dev/pci/drm/drm_edid.c for full context.
So, the crash seems to happen somewhere here, inside the inlined
drm_edid_block_checksum (its unrolled body wasn't fun to follow):
static int drm_edid_block_checksum(const u8 *raw_edid)
{
int i;
u8 csum = 0, crc = 0;
for (i = 0; i < EDID_LENGTH - 1; i++)
csum += raw_edid[i];
...
This above reads the memory in the range [raw_edid, raw_edid+0x7e). The
invocation of drm_edid_block_checksum is here:
static void connector_bad_edid(struct drm_connector *connector,
u8 *edid, int num_blocks)
{
int i;
u8 num_of_ext = edid[0x7e];
if (num_of_ext > num_blocks)
num_of_ext = num_blocks;
/* Calculate real checksum for the last edid extension block data */
connector->real_edid_checksum =
drm_edid_block_checksum(edid + num_of_ext * EDID_LENGTH);
Now, the stack trace below fairly clearly indicates that num_blocks is 1
(the value of %rax compared to the second parameter of
connector_bad_edid corroborates). My reading of drm_do_get_edid
disassembly indicates that connector_bad_edid we call is the one in the
error path after carp, so let read a bit of drm_do_get_edid:
struct edid *drm_do_get_edid(struct drm_connector *connector,
int (*get_edid_block)(void *data, u8 *buf, unsigned int block,
size_t len),
void *data)
if ((edid = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
return NULL;
/* base block fetch */
for (i = 0; i < 4; i++) {
if (get_edid_block(data, edid, 0, EDID_LENGTH))
goto out;
if (drm_edid_block_valid(edid, 0, false,
&connector->edid_corrupt))
break;
if (i == 0 && drm_edid_is_zero(edid, EDID_LENGTH)) {
connector->null_edid_counter++;
goto carp;
}
}
if (i == 4)
goto carp;
...
carp:
connector_bad_edid(connector, edid, 1);
I see that we allocate edid of size EDID_LENGTH and then use
connector_bad_edid to read after the end of that block. The merciful
page fault is the best our kernel can do for this code, no?
I'm inclined to just do this:
- connector_bad_edid(connector, edid, 1);
+ connector_bad_edid(connector, edid, 0);
Am I wrong? Should there be a fix in the other call of connector_bad_edid?
Thanks
Greg
Greg Steuck <[email protected]> writes:
> I noticed we saw variations of this before. This hit my X1 9th gen when
> waking up with a 4k monitor attached via HDMI connector. FWIW, the
> screen is mirrored between the built-in panel and the big monitor.
>
> The below is largely OCRed and then hand-fixed from
> https://photos.app.goo.gl/qyXdd3e1Nsso921o9
>
> uvm_fault(0xffffffff82167400, 0xffff8000026c7000, 0, 1) -> e
> connector_bad_edid+0x50: movzbl 0xfffffffffffffffd(%rax, %rcx,1), %esi
> ddb{2}> bt
> connector_bad_edid(ffff800000cde000, ffff8000026c6f80,1) at
> connector_bad_edid+0x50
> drm_do_get_edid(ffff800000cde000, ffffffff81a00770, ffff8000002b20c8) at
> drm_do_get_edid+0x378
> drm_get_edid(ffff800000cde000, ffff8000002b20c0) at drm_get_edid+0x67?
> intel_hdmi_set_edid(ffff800000cde000) at intel_hdmi_set_edid+0x69
> intel_encoder_hotplug(ffff800000cd9000, ffff800000cd000,1) at
> intel_encoder_hotplug+0x7f
> intel_ddi_hotplug(ffff800000cd9000, ffff800000cde000.1) at
> intel_ddi_hotplug+0x54
> intel_hdmi_detect(ffff800000cd000,0) at intel_hdmi_detect+0xb1
> drm_helper_probe_detect(ffff800000cde000,0.0) at drm_helper_probe_detect+0xed
> i915_hotplug_work_func(ffff8000002b3168) at i915_hotplug_work_func+0x24f
> taskq_thread(ffff8000082aa380) at taskq_thread+0x81
> end trace frame: 0x0, count: -10
> ddb{2}> show registers
> rdi 0xffff800000cde000
> rsi 0xffff8000026c6f80
> rbp 0xffff800021c0c620
> rbx 0xffff8000026c6f80
> rdx 0
> rcx 0x3
> rax 0xffff8000026c7000
> r8 0x200
> r9 0xa
> r10 0xa146c6f403a26e87
> r11 0x7b0d0680ac61d824
> r12 0xffff8000002b20c0
> r13 0xffffffff81a00770
> r14 0xffff800000cde800
> r15 0x1
> rip 0xffffffff81a005b0 connector_bad_edid+0x50
> cs 0x8
> rflags 0x10246
> rsp 0xffff800021c0c5b0
> ss 0x10
> connector_bad_edid+0x50: movzbl 0xfffffffffffffffd(%rax, %rcx,1), %esi
> ddb{2}> show proc
> PROC (draw) pid=476108 stat=onproc
> flags process=14000<NOZOMBIE, SYSTEM> proc=200<SYSTEM>
> pri=0, usrpri=59, nice=20
> forw=0xffffffffffffffff, List-0xffff8000ffffc7e8,0xffff 8000ffffc018
> process=0xffff800021c063e8 user=0xffff800021c07000,
> vmspace=0xffffffff821b0388
> estcpu=9, cpticks=38, pctcpu=0.0
> user=0, sys=1, intr=0
> ddb{2}>
>
> OpenBSD 6.9-beta (GENERIC.MP) #419: Sat Mar 20 00:43:49 MDT 2021
> [email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 16903794688 (16120MB)
> avail mem = 16376107008 (15617MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 3.2 @ 0x766ac000 (72 entries)
> bios0: vendor LENOVO version "N2WET24W (1.14 )" date 10/15/2020
> bios0: LENOVO 20UAS1H100
> acpi0 at bios0: ACPI 6.1
> acpi0: sleep states S0 S3 S4 S5
> acpi0: tables DSDT FACP SSDT SSDT SSDT SSDT SSDT TPM2 SSDT HPET APIC MCFG
> ECDT SSDT SSDT SSDT NHLT BOOT SSDT LPIT WSMT SSDT DBGP DBG2 POAT BATB DMAR
> ASF! BGRT UEFI FPDT
> acpi0: wakeup devices GLAN(S4) XHC_(S3) XDCI(S4) HDAS(S4) RP01(S4) PXSX(S4)
> RP02(S4) PXSX(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) PXSX(S4) RP06(S4)
> PXSX(S4) RP07(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpihpet0 at acpi0: 23999999 Hz
> acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz, 7094.00 MHz, 06-8e-0c
> cpu0:
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,SRBDS_CTRL,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> cpu0: apic clock running at 24MHz
> cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
> ...
> inteldrm0 at pci0 dev 2 function 0 "Intel UHD Graphics" rev 0x02
> drm0 at inteldrm0
> inteldrm0: msi, COFFEELAKE, gen 9
> ...
> inteldrm0: 3840x2160, 32bpp
> wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
> wsdisplay0: screen 1-5 added (std, vt100 emulation)
> ...
> drm:pid42157:connector_bad_edid *WARNING* HDMI-A-1: EDID is invalid:
> [00] BAD 00 ff ff ff ff ff ff 00 ff 00 04 04 72 72 10 04
> [00] BAD 04 d5 a1 a1 80 80 73 26 26 1b 01 01 03 80 80 47
> [00] BAD 47 28 78 78 2e 88 88 e5 e5 a8 55 55 4d a0 a0 25
> [00] BAD 0e 0e 50 50 54 bf bf ef 80 80 71 71 4f 81 81 40
> [00] BAD 81 81 80 81 81 c0 c0 95 00 00 b3 00 00 d1 d1 c0
> [00] BAD 01 01 01 4d 4d d0 d0 00 a0 a0 f0 70 70 3e 80 80
> [00] BAD 30 30 20 35 35 00 c4 c4 8f 8f 21 00 00 00 1a 1a
> [00] BAD 00 00 00 00 00 fd 00 00 17 4c 4c 1f 1f 87 3c 3c
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 4f
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 4f
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> uvm_fault(0xffffffff82167400, 0xffff8000026c7000, 0, 1) -> e
>
>
> The disassembly of the function is:
>
> 00000000000037c0 <connector_bad_edid>:
> 37c0: 4c 8b 1d 00 00 00 00 mov 0(%rip),%r11 # 37c7
> <connector_bad_edid+0x7>
> 37c7: 4c 33 1c 24 xor (%rsp),%r11
> 37cb: 55 push %rbp
> 37cc: 48 89 e5 mov %rsp,%rbp
> 37cf: 57 push %rdi
> 37d0: 56 push %rsi
> 37d1: 52 push %rdx
> 37d2: 57 push %rdi
> 37d3: 41 53 push %r11
> 37d5: 41 57 push %r15
> 37d7: 41 56 push %r14
> 37d9: 41 55 push %r13
> 37db: 41 54 push %r12
> 37dd: 53 push %rbx
> 37de: 48 83 ec 20 sub $0x20,%rsp
> 37e2: 41 89 d7 mov %edx,%r15d
> 37e5: 0f b6 46 7e movzbl 0x7e(%rsi),%eax
> 37e9: 39 d0 cmp %edx,%eax
> 37eb: 0f 4f c2 cmovg %edx,%eax
> 37ee: 0f b6 c0 movzbl %al,%eax
> 37f1: 48 c1 e0 07 shl $0x7,%rax
> 37f5: 48 89 75 a8 mov %rsi,0xffffffffffffffa8(%rbp)
> 37f9: 48 01 f0 add %rsi,%rax
> 37fc: 31 d2 xor %edx,%edx
> 37fe: b9 03 00 00 00 mov $0x3,%ecx
> 3803: eb 0b jmp 3810 <connector_bad_edid+0x50>
> 3805: cc int3
> 3806: cc int3
> 3807: cc int3
> 3808: cc int3
> 3809: cc int3
> 380a: cc int3
> 380b: cc int3
> 380c: cc int3
> 380d: cc int3
> 380e: cc int3
> 380f: cc int3
> 3810: 0f b6 74 08 fd movzbl
> 0xfffffffffffffffd(%rax,%rcx,1),%esi
> 3815: 01 d6 add %edx,%esi
> 3817: 0f b6 54 08 fe movzbl
> 0xfffffffffffffffe(%rax,%rcx,1),%edx
> 381c: 01 f2 add %esi,%edx
> 381e: 0f b6 f2 movzbl %dl,%esi
> 3821: 0f b6 54 08 ff movzbl
> 0xffffffffffffffff(%rax,%rcx,1),%edx
> 3826: 01 f2 add %esi,%edx
> 3828: 48 83 f9 7f cmp $0x7f,%rcx
> 382c: 74 0f je 383d <connector_bad_edid+0x7d>
> 382e: 0f b6 34 08 movzbl (%rax,%rcx,1),%esi
> 3832: 01 f2 add %esi,%edx
> 3834: 0f b6 d2 movzbl %dl,%edx
> 3837: 48 83 c1 04 add $0x4,%rcx
> 383b: eb d3 jmp 3810 <connector_bad_edid+0x50>
> 383d: f6 da neg %dl
> 383f: 88 97 19 04 00 00 mov %dl,0x419(%rdi)
> 3845: 8b 87 14 04 00 00 mov 0x414(%rdi),%eax
> 384b: 8d 48 01 lea 0x1(%rax),%ecx
> 384e: 89 8f 14 04 00 00 mov %ecx,0x414(%rdi)
> 3854: 85 c0 test %eax,%eax
> 3856: 74 0d je 3865 <connector_bad_edid+0xa5>
> };
>
> static inline bool drm_debug_enabled(enum drm_debug_category category)
> {
> return unlikely(__drm_debug & category);
> 3858: f6 05 00 00 00 00 04 testb $0x4,0(%rip) # 385f
> <connector_bad_edid+0x9f>