> Date: Wed, 10 Jun 2020 11:28:04 +0200
> From: Otto Moerbeek <[email protected]>
>
> On Tue, Jun 09, 2020 at 08:28:57PM +0200, Mark Kettenis wrote:
>
> > > Date: Tue, 9 Jun 2020 20:08:42 +0200
> > > From: Otto Moerbeek <[email protected]>
> > >
> > > On Tue, Jun 09, 2020 at 04:19:34PM +0200, Mark Kettenis wrote:
> > >
> > > > > Date: Tue, 9 Jun 2020 16:08:26 +0200
> > > > > From: Otto Moerbeek <[email protected]>
> > > > >
> > > > > On Tue, Jun 09, 2020 at 04:05:25PM +0200, Otto Moerbeek wrote:
> > > > >
> > > > > > On Tue, Jun 09, 2020 at 04:59:17PM +1000, Jonathan Gray wrote:
> > > > > >
> > > > > > > On Tue, Jun 09, 2020 at 08:12:17AM +0200, Otto Moerbeek wrote:
> > > > > > > > On Tue, Jun 09, 2020 at 08:01:12AM +0200, Otto Moerbeek wrote:
> > > > > > > >
> > > > > > > > > On Mon, Jun 08, 2020 at 09:46:23PM +0200, Mark Kettenis wrote:
> > > > > > > > >
> > > > > > > > > > > Date: Mon, 8 Jun 2020 20:27:22 +0200
> > > > > > > > > > > From: Otto Moerbeek <[email protected]>
> > > > > > > > > > >
> > > > > > > > > > > Hi.
> > > > > > > > > > >
> > > > > > > > > > > a page fault trap happens if I boot my Thnkpad X1 6th
> > > > > > > > > > > generation in the dock
> > > > > > > > > > > or put it in the dock afterwards. The dock has two DP
> > > > > > > > > > > monitors connected.
> > > > > > > > > > >
> > > > > > > > > > > If I change connector_bad_edid() to return immediately
> > > > > > > > > > > things seems to
> > > > > > > > > > > work ok.
> > > > > > > > > > >
> > > > > > > > > > > -Otto
> > > > > > > > > > >
> > > > > > > > > > > summary of trace:
> > > > > > > > > > >
> > > > > > > > > > > connector_bad_edid+0x4d
> > > > > > > > > > > drm_do_get_edid+0x382
> > > > > > > > > > > drm_get_edid+0x6b
> > > > > > > > > > > intel_hmi_set_edid+0xad
> > > > > > > > > > > intel_hdmi_detect+0xb1
> > > > > > > > > > > drm_helper_probe_detect+0x108
> > > > > > > > > > > intel_encoder_hotplug+0x7f
> > > > > > > > > > > intel_ddi_hotplug+0x54
> > > > > > > > > > > i915_hotplug_work_func+0x245
> > > > > > > > > > > tasq_thread+0x8d
> > > > > > > > > > >
> > > > > > > > > > > full trace:
> > > > > > > > > > >
> > > > > > > > > > > https://www.drijf.net/openbsd/IMG_20200608_154513.jpg
> > > > > > > > > >
> > > > > > > > > > Not sure what kernel you're using. But the instruction in
> > > > > > > > > > that image
> > > > > > > > > > doesn't eist in connector_bad_edid in the kernel I just
> > > > > > > > > > built.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Strange, I do have it. /usr/src/sys/dev/pci/drm/drm_edid.c
> > > > > > > > > contains
> > > > > > > > > no version marker, but it's md5 is
> > > > > > > > > 3d889f9e1cb3c66cdb4eb49e9319d947
> > > > > > > > >
> > > > > > > > > objdump -d snippet:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > 0000000000003830 <connector_bad_edid>:
> > > > > > > > > 3830: 4c 8b 1d 00 00 00 00 mov 0(%rip),%r11
> > > > > > > > > #
> > > > > > > > > 3837 <connector_bad_edid+0x7>
> > > > > > > > > 3837: 4c 33 1c 24 xor (%rsp),%r11
> > > > > > > > > 383b: 55 push %rbp
> > > > > > > > > 383c: 48 89 e5 mov %rsp,%rbp
> > > > > > > > > 383f: 57 push %rdi
> > > > > > > > > 3840: 56 push %rsi
> > > > > > > > > 3841: 52 push %rdx
> > > > > > > > > 3842: 57 push %rdi
> > > > > > > > > 3843: 41 53 push %r11
> > > > > > > > > 3845: 41 57 push %r15
> > > > > > > > > 3847: 41 56 push %r14
> > > > > > > > > 3849: 41 55 push %r13
> > > > > > > > > 384b: 41 54 push %r12
> > > > > > > > > 384d: 53 push %rbx
> > > > > > > > > 384e: 48 83 ec 20 sub $0x20,%rsp
> > > > > > > > > 3852: 41 89 d7 mov %edx,%r15d
> > > > > > > > > 3855: 0f b6 46 7e movzbl 0x7e(%rsi),%eax
> > > > > > > > > 3859: 48 c1 e0 07 shl $0x7,%rax
> > > > > > > > > 385d: 48 89 75 a8 mov
> > > > > > > > > %rsi,0xffffffffffffffa8(%rbp)
> > > > > > > > > 3861: 48 01 f0 add %rsi,%rax
> > > > > > > > > 3864: 31 d2 xor %edx,%edx
> > > > > > > > > 3866: b9 03 00 00 00 mov $0x3,%ecx
> > > > > > > > > 386b: eb 10 jmp 387d
> > > > > > > > > <connector_bad_edid+0x4d>
> > > > > > > > > 386d: cc int3
> > > > > > > > > 386e: cc int3
> > > > > > > > > 386f: cc int3
> > > > > > > > > 3870: 0f b6 34 08 movzbl
> > > > > > > > > (%rax,%rcx,1),%esi
> > > > > > > > > 3874: 01 f2 add %esi,%edx
> > > > > > > > > 3876: 0f b6 d2 movzbl %dl,%edx
> > > > > > > > > 3879: 48 83 c1 04 add $0x4,%rcx
> > > > > > > > > 387d: 0f b6 74 08 fd movzbl
> > > > > > > > > 0xfffffffffffffffd(%rax,%rcx,1),%esi
> > > > > > > > > 3882: 01 d6 add %edx,%esi
> > > > > > > > > 3884: 0f b6 54 08 fe movzbl
> > > > > > > > > 0xfffffffffffffffe(%rax,%rcx,1),%edx
> > > > > > > > > 3889: 01 f2 add %esi,%edx
> > > > > > > > > 388b: 0f b6 f2 movzbl %dl,%esi
> > > > > > > > > 388e: 0f b6 54 08 ff movzbl
> > > > > > > > > 0xffffffffffffffff(%rax,%rcx,1),%edx
> > > > > > > > > 3893: 01 f2 add %esi,%edx
> > > > > > > > > 3895: 48 83 f9 7f cmp $0x7f,%rcx
> > > > > > > > > 3899: 75 d5 jne 3870
> > > > > > > > > <connector_bad_edid+0x40>
> > > > > > > > > ...
> > > > > > > > >
> > > > > > > >
> > > > > > > > This looks like an inlined unrolled version of
> > > > > > > > drm_edid_block_checksum()
> > > > > > >
> > > > > > > gdb points to drm_edid_block_checksum() as well
> > > > > > >
> > > > > > > 0xffffffff819966ed <+77>: movzbl -0x3(%rax,%rcx,1),%esi
> > > > > > >
> > > > > > > (gdb) info line *0xffffffff819966ed
> > > > > > > Line 1597 of "/sys/dev/pci/drm/drm_edid.c"
> > > > > > > starts at address 0xffffffff819966ed <connector_bad_edid+77>
> > > > > > > and ends at 0xffffffff819966fb <connector_bad_edid+91>.
> > > > > > >
> > > > > > > >
> > > > > > > > My guess is num_of_ext > num_blocks, will verify later when I'm
> > > > > > > > at the machine.
> > > > > > > >
> > > > > > > > -Otto
> > > > > > >
> > > > > >
> > > > > > Indeed. the diff below works for me,
> > > > > >
> > > > > > -Otto
> > > > > >
> > > > > > Index: drm_edid.c
> > > > > > ===================================================================
> > > > > > RCS file: /cvs/src/sys/dev/pci/drm/drm_edid.c,v
> > > > > > retrieving revision 1.27
> > > > > > diff -u -p -r1.27 drm_edid.c
> > > > > > --- drm_edid.c 8 Jun 2020 04:47:58 -0000 1.27
> > > > > > +++ drm_edid.c 9 Jun 2020 14:04:21 -0000
> > > > > > @@ -1807,6 +1807,9 @@ static void connector_bad_edid(struct dr
> > > > > > int i;
> > > > > > u8 num_of_ext = edid[0x7e];
> > > > > >
> > > > > > + if (num_of_ext > num_blocks)
> > > > > > + num_of_ext = num_blocks;
> > > > > > +
> > > > > > /* Calculate real checksum for the last edid extension block
> > > > > > data */
> > > > > > connector->real_edid_checksum =
> > > > > > drm_edid_block_checksum(edid + num_of_ext *
> > > > > > EDID_LENGTH);
> > > > > >
> > > > >
> > > > > Output: (first line is some debug code I added):
> > > > >
> > > > > num_of_ext = ff; num_blocks = 1
> > > > > drm:pid0:connector_bad_edid *WARNING* HDMI-A-1: EDID is invalid:
> > > > > [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > > >
> > > >
> > > > So now the question becomes why this happens. Is that really what the
> > > > monitor reports? Or is there some other bug that prevents us from
> > > > reading the right data?
> > > >
> > > > Would be grwat if you could find that out, but narrowing it down to
> > > > the code that actually gets used to read the EDID in this particular
> > > > case would already help.
> > > >
> > >
> > > I added some debug printfs:
> > >
> > > step 0: i = 4
> > > step 4: callling connector_bad_edid
> > > drm:pid0:connector_bad_edid *WARNING* HDMI-A-1: EDID is invalid:
> > > [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > > [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > >
> > > Does this give enough clues?
> >
> > Sorry, no.
> >
> > It seems you're coming through drm_get_edid() here, and what I'm after
> > is which specific instance of struct i2c_adapter is being used here.
> > And once that is known, how that specific i2c controller
> > implementation is failing or not.
> >
> > A first clue though is to check whether the i2c transfers in
> > drm_do_probe_ddc_edid() is failing or not.
ok, so this is using the implementation in display/intel_gmbus.c which
is a complicated one since it has code to fall back to "bit banging"
when the i2c master controller doesn't seem to work. You could dig
deeper into that and enable the debugging stuff in there. Does it
fall back onto the bitanging code?
You mentioned that you have two monitors connected to the dock. Do
any of them actually work?
If Jonathan agrees, I think you should commit the range check that you
added:
> + if (num_of_ext > num_blocks)
> + num_of_ext = num_blocks;
> +
Cheers,
Mark
> root on sd1a (9c6175b643dc33e7.a) swap on sd1b dump on sd1b
> GPE 102 ret 0 0
> drm_do_probe_ddc_edid/: 2 2
> drm_do_get_edid: call get_edid_block
> drm_do_probe_ddc_edid/: 2 2
> step 0: i = 0
> step 1: valid_extensions = 0
> drm_connector_attach_content_protection_property: stub
> drm_connector_attach_content_protection_property: stub
> drm_connector_attach_content_protection_property: stub
> drm_connector_attach_content_protection_property: stub
> i915_perf_register: stub
> debugfs_gt_register: stub
> drm_do_probe_ddc_edid/i915 gmbus dpb: -6 2
> drm_do_probe_ddc_edid/i915 gmbus dpb: 2 2
> drm_do_get_edid: call get_edid_block
> drm_do_probe_ddc_edid/i915 gmbus dpb: 2 2
> drm_do_get_edid: call get_edid_block
> drm_do_probe_ddc_edid/i915 gmbus dpb: 2 2
> drm_do_get_edid: call get_edid_block
> drm_do_probe_ddc_edid/i915 gmbus dpb: 2 2
> drm_do_get_edid: call get_edid_block
> drm_do_probe_ddc_edid/i915 gmbus dpb: 2 2
> step 0: i = 4
> step 4: callling connector_bad_edid
> drm:pid0:connector_bad_edid *WARNING* HDMI-A-1: EDID is invalid:
> [00] BAD 00 ff ff ff ff ff ff 00 ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> [00] BAD ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> drm_do_probe_ddc_edid/i915 gmbus dpc: -35 2
> inteldrm0: 2560x1440, 32bpp
>
>
> Looks like the read succeeds but does not produce a valid block.
>
> -Otto
>
> Index: drm_edid.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_edid.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 drm_edid.c
> --- drm_edid.c 8 Jun 2020 04:47:58 -0000 1.27
> +++ drm_edid.c 10 Jun 2020 09:23:35 -0000
> @@ -1798,6 +1798,7 @@ drm_do_probe_ddc_edid(void *data, u8 *bu
> }
> } while (ret != xfers && --retries);
>
> + printf("%s/%s: %d %d\n", __func__, adapter->name, ret, xfers);
> return ret == xfers ? 0 : -1;
> }
>
> @@ -1807,6 +1808,9 @@ static void connector_bad_edid(struct dr
> int i;
> u8 num_of_ext = edid[0x7e];
>
> + if (num_of_ext > num_blocks)
> + num_of_ext = num_blocks;
> +
> /* Calculate real checksum for the last edid extension block data */
> connector->real_edid_checksum =
> drm_edid_block_checksum(edid + num_of_ext * EDID_LENGTH);
> @@ -1916,6 +1920,7 @@ struct edid *drm_do_get_edid(struct drm_
>
> /* base block fetch */
> for (i = 0; i < 4; i++) {
> + printf("%s: call get_edid_block\n", __func__);
> if (get_edid_block(data, edid, 0, EDID_LENGTH))
> goto out;
> if (drm_edid_block_valid(edid, 0, false,
> @@ -1926,11 +1931,13 @@ struct edid *drm_do_get_edid(struct drm_
> goto carp;
> }
> }
> + printf("step 0: i = %d\n", i);
> if (i == 4)
> goto carp;
>
> /* if there's no extensions, we're done */
> valid_extensions = edid[0x7e];
> + printf("step 1: valid_extensions = %x\n", valid_extensions);
> if (valid_extensions == 0)
> return (struct edid *)edid;
>
> @@ -1951,6 +1958,7 @@ struct edid *drm_do_get_edid(struct drm_
> u8 *block = edid + j * EDID_LENGTH;
>
> for (i = 0; i < 4; i++) {
> + printf("%s: call get_edid_block 2\n", __func__);
> if (get_edid_block(data, block, j, EDID_LENGTH))
> goto out;
> if (drm_edid_block_valid(block, j, false, NULL))
> @@ -1960,10 +1968,12 @@ struct edid *drm_do_get_edid(struct drm_
> if (i == 4)
> valid_extensions--;
> }
> + printf("step 2: valid_extensions = %x edid[0x7e] = %x\n",
> valid_extensions, edid[0x7e]);
>
> if (valid_extensions != edid[0x7e]) {
> u8 *base;
>
> + printf("step 3: calling connector_bad_edid\n");
> connector_bad_edid(connector, edid, edid[0x7e] + 1);
>
> edid[EDID_LENGTH-1] += edid[0x7e] - valid_extensions;
> @@ -1989,9 +1999,11 @@ struct edid *drm_do_get_edid(struct drm_
> edid = new;
> }
>
> + printf("return\n");
> return (struct edid *)edid;
>
> carp:
> + printf("step 4: callling connector_bad_edid\n");
> connector_bad_edid(connector, edid, 1);
> out:
> kfree(edid);
>
>