Quoting Imre Deak (2025-11-12 14:53:47-03:00)
>On Fri, Nov 07, 2025 at 09:05:40PM -0300, Gustavo Sousa wrote:
>> Xe3p_LPD has a new feature that allows the driver to allocate at runtime
>> the DDI (TC ones) port to drive a legacy connection on the Type-C
>> subsystem.  This allows better resource utilization, because now there
>> is no need to statically reserve ports for legacy connectors on the
>> Type-C subsystem.
>> 
>> That said, our driver is not yet ready for the dynamic allocation.
>> Thus, as an incremental step, let's add the logic containing the
>> required programming sequence for the allocation, but, instead of
>> selecting the first available port, we try so use the 1:1 mapping
>> expected by the driver today.
>> 
>> Bspec: 68954
>> Co-developed-by: Dnyaneshwar Bhadane <[email protected]>
>> Signed-off-by: Dnyaneshwar Bhadane <[email protected]>
>> Signed-off-by: Gustavo Sousa <[email protected]>
>> ---
>> 
>> NOTE: This patch is still a WIP. There are some opens to resolve here.
>> Nevertheless, I'm sending it here for early feedback.
>> 
>> For the HIP-index stuff, I have a local refactor started and need to
>> finish it up and send it.
>> 
>> The other open is about concurrent calls to iom_dp_resource_lock().  It
>> is likely that we need to have a software lock to prevent concurrent
>> access to IOM_DP_HW_RESOURCE_SEMAPHORE from our driver.
>> ---
>>  drivers/gpu/drm/i915/display/intel_display_regs.h |  20 ++-
>>  drivers/gpu/drm/i915/display/intel_tc.c           | 151 
>> +++++++++++++++++++++-
>>  2 files changed, 169 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/display/intel_display_regs.h 
>> b/drivers/gpu/drm/i915/display/intel_display_regs.h
>> index 89ea0156ee06..0cf7d43ce210 100644
>> --- a/drivers/gpu/drm/i915/display/intel_display_regs.h
>> +++ b/drivers/gpu/drm/i915/display/intel_display_regs.h
>> @@ -2908,6 +2908,25 @@ enum skl_power_gate {
>>  #define   DP_PIN_ASSIGNMENT(idx, x)                ((x) << ((idx) * 4))
>>  /* See enum intel_tc_pin_assignment for the pin assignment field values. */
>>  
>> +/*
>> + * FIXME: There is also a definition for this register in 
>> intel_dkl_phy_regs.h.
>> + * We need to consolidate the definitions.
>> + */
>> +#define HIP_INDEX_REG0                                _MMIO(0x1010a0)
>> +#define   HIP_168_INDEX_MASK                        REG_GENMASK(3, 0)
>> +#define   HIP_168_IOM_RES_MGMT                        
>> REG_FIELD_PREP(HIP_168_INDEX_MASK, 0x1)
>> +
>> +#define IOM_DP_HW_RESOURCE_SEMAPHORE                _MMIO(0x168038)
>> +#define   IOM_DP_HW_SEMLOCK                        REG_BIT(31)
>> +#define   IOM_REQUESTOR_ID_MASK                        REG_GENMASK(3, 0)
>> +#define   IOM_REQUESTOR_ID_DISPLAY_ENGINE        
>> REG_FIELD_PREP(IOM_REQUESTOR_ID_MASK, 0x4)
>> +
>> +#define IOM_DP_RESOURCE_MNG                        _MMIO(0x16802c)
>> +#define   IOM_DDI_CONSUMER_SHIFT(tc_port)        ((tc_port) * 4)
>> +#define   IOM_DDI_CONSUMER_MASK(tc_port)        (0xf << 
>> IOM_DDI_CONSUMER_SHIFT(tc_port))
>> +#define   IOM_DDI_CONSUMER(tc_port, x)                ((x) << 
>> IOM_DDI_CONSUMER_SHIFT(tc_port))
>> +#define   IOM_DDI_CONSUMER_STATIC_TC(tc_port)        
>> IOM_DDI_CONSUMER(tc_port, 0x8 + (tc_port))
>
>It would be simpler to define the above without the shift, i.e. as 8 +
>tc_port.

You mean something like this?

#define   IOM_DDI_CONSUMER_STATIC_TC(tc_port)        (0x8 + (tc_port))

?

Yeah... Looking at the code, we wouldn't need to shift back when
defining expected_consumer.

>
>> +
>>  #define _TCSS_DDI_STATUS_1                        0x161500
>>  #define _TCSS_DDI_STATUS_2                        0x161504
>>  #define TCSS_DDI_STATUS(tc)                        _MMIO(_PICK_EVEN(tc, \
>> @@ -2946,5 +2965,4 @@ enum skl_power_gate {
>>  #define   MTL_TRDPRE_MASK                REG_GENMASK(7, 0)
>>  
>>  
>> -
>>  #endif /* __INTEL_DISPLAY_REGS_H__ */
>> diff --git a/drivers/gpu/drm/i915/display/intel_tc.c 
>> b/drivers/gpu/drm/i915/display/intel_tc.c
>> index 7e17ca018748..3c333999bbe4 100644
>> --- a/drivers/gpu/drm/i915/display/intel_tc.c
>> +++ b/drivers/gpu/drm/i915/display/intel_tc.c
>> @@ -9,6 +9,7 @@
>>  
>>  #include "i915_reg.h"
>>  #include "intel_atomic.h"
>> +#include "intel_bios.h"
>>  #include "intel_cx0_phy_regs.h"
>>  #include "intel_ddi.h"
>>  #include "intel_de.h"
>> @@ -25,6 +26,9 @@
>>  #include "intel_modeset_lock.h"
>>  #include "intel_tc.h"
>>  
>> +#define IOM_DP_RES_SEMAPHORE_LOCK_TIMEOUT_US        10
>> +#define IOM_DP_RES_SEMAPHORE_RETRY_TIMEOUT_US        10000
>
>The above param names should make it clear how poll_timeout_us() uses
>them (i.e. stg like sleep vs. timeout instead of lock_timeout vs.
>retry_timeout), but probably even clearer to just drop the defines and
>inline the values in the call.

Ack.  I going with the latter.

>
>> +
>>  enum tc_port_mode {
>>          TC_PORT_DISCONNECTED,
>>          TC_PORT_TBT_ALT,
>> @@ -1200,6 +1204,143 @@ static void xelpdp_tc_phy_get_hw_state(struct 
>> intel_tc_port *tc)
>>          __tc_cold_unblock(tc, domain, tc_cold_wref);
>>  }
>>  
>> +static void iom_res_mgmt_prepare_reg_access(struct intel_display *display)
>> +{
>> +        /*
>> +         * IOM resource management registers live in the 2nd 4KB page of IOM
>> +         * address space. So we need to configure HIP_INDEX_REG0 with the
>> +         * correct index.
>> +         *
>> +         * FIXME: We need to have this and dekel PHY implementation using a
>> +         * common abstraction to access registers on the HIP-indexed 
>> ranges, and
>> +         * this function would then be dropped.
>> +         */
>> +        intel_de_rmw(display, HIP_INDEX_REG0,
>> +                     HIP_168_INDEX_MASK, HIP_168_IOM_RES_MGMT);
>
>This matches what intel_dkl_phy.c does, that one also taking the
>required lock around the access. At one point the intel_dkl_phy
>file/interface could be renamed to intel_tc_reg or similar accordingly.

I have already started a local series that introduces the "HIP-index
based registers" abstraction.  I need to go back to finish it up.

The basic idea is that both intel_dkl_phy.c and this IOM stuff would use
functions specific to accessing registers behind the HIP-based ranges.

To give an idea, here is a copy/paste of the kerneldoc I currently have
in that WIP series:

  | diff --git a/drivers/gpu/drm/i915/display/intel_hip_idx.c 
b/drivers/gpu/drm/i915/display/intel_hip_idx.c
  | new file mode 100644
  | index 000000000000..ff2492b8275d
  | --- /dev/null
  | +++ b/drivers/gpu/drm/i915/display/intel_hip_idx.c
  | @@ -0,0 +1,110 @@
  | +// SPDX-License-Identifier: MIT
  | +/*
  | + * Copyright (C) 2025 Intel Corporation
  | + */
  | +
  | +/**
  | + * DOC: Display HIP-indexed register access
  | + *
  | + * Display MMIO mapping for offsets in [0x168000,0x16ffff] are governed by
  | + * configurations in the HIP_INDEX registers provided by the hardware.
  | + *
  | + * Usually each of the valid 4KB range in that space will be mapped to 
some IP
  | + * block, providing access to registers of that IP. The HIP_INDEX registers
  | + * expose an 8-bit index value for each 4KB range. Those indices provide a 
way
  | + * to access data that is beyond the initial 4KB block provided by the 
target
  | + * IP.
  | + *
  | + * As an example, say that the range [0x16a000,0x16afff] is mapped to some
  | + * sub-IP that contains a 8KB register file. To access the initial 4KB of 
that
  | + * register file, we would need to set the index respective to
  | + * [0x16a000,0x16afff] in HIP_INDEX to 0; to access data in the second 4KB
  | + * window, we would need to set the index to 1.
  | + *
  | + * For simple read, write or rmw operations on a single register, the
  | + * functions intel_hip_idx_reg_read(), intel_hip_idx_reg_write() and
  | + * intel_hip_idx_reg_rmw() can be used, which will call 
intel_hip_idx_lock()
  | + * and intel_hip_idx_unlock() internally.
  | + *
  | + * For other scenarios, then it is necessary to wrap the register accesses
  | + * with explicit calls to intel_hip_idx_lock() and intel_hip_idx_unlock(), 
and
  | + * use the MMIO functions provided by intel_de.h. For the latter, the 
function
  | + * intel_hip_idx_reg_to_i915_reg() needs to be used in order to provide the
  | + * correct reg argument to those functions.
  | + */
  | (...)

>
>> +}
>> +
>> +/*
>> + * FIXME: This function also needs to avoid concurrent accesses from the 
>> driver
>> + * itself, possibly via a software lock.
>> + */
>> +static int iom_dp_resource_lock(struct intel_tc_port *tc)
>> +{
>> +        struct intel_display *display = to_intel_display(tc->dig_port);
>> +        u32 val = IOM_DP_HW_SEMLOCK | IOM_REQUESTOR_ID_DISPLAY_ENGINE;
>> +        int ret;
>> +
>> +        iom_res_mgmt_prepare_reg_access(display);
>> +        ret = poll_timeout_us(intel_de_write(display, 
>> IOM_DP_HW_RESOURCE_SEMAPHORE, val),
>> +                              (intel_de_read(display, 
>> IOM_DP_HW_RESOURCE_SEMAPHORE) & val) == val,
>
>This happens to work, but it's more future proof/bspec conformant to
>properly mask the requestor ID field when reading it back.

Agreed.

>
>> +                              IOM_DP_RES_SEMAPHORE_LOCK_TIMEOUT_US,
>> +                              IOM_DP_RES_SEMAPHORE_RETRY_TIMEOUT_US, false);
>> +
>> +        if (ret)
>> +                drm_err(display->drm, "Port %s: timeout trying to lock IOM 
>> semaphore\n",
>> +                        tc->port_name);
>> +
>> +        return ret;
>> +}
>> +
>> +static void iom_dp_resource_unlock(struct intel_tc_port *tc)
>> +{
>> +        struct intel_display *display = to_intel_display(tc->dig_port);
>> +
>> +        iom_res_mgmt_prepare_reg_access(display);
>> +        intel_de_write(display, IOM_DP_HW_RESOURCE_SEMAPHORE, 
>> IOM_REQUESTOR_ID_DISPLAY_ENGINE);
>> +}
>> +
>> +static bool xe3p_tc_iom_allocate_ddi(struct intel_tc_port *tc, bool 
>> allocate)
>> +{
>> +        struct intel_display *display = to_intel_display(tc->dig_port);
>> +        struct intel_digital_port *dig_port = tc->dig_port;
>> +        enum tc_port tc_port = intel_encoder_to_tc(&dig_port->base);
>> +        u32 val;
>> +        u32 consumer;
>> +        u32 expected_consumer;
>> +        bool ret;
>> +
>> +        if (DISPLAY_VER(display) < 35)
>> +                return true;
>> +
>> +        if (tc->mode != TC_PORT_LEGACY)
>> +                return true;
>> +
>> +        if 
>> (!intel_bios_encoder_supports_dyn_port_over_tc(dig_port->base.devdata))
>
>dedicated_external is stored separately in dig_port, why the "related"
>dyn_port_over_tc flag isn't?

The only reason dedicated_external is stored is because VBT data is
already freed by the time intel_encoder_is_tc() is called in the driver
unbind path.  In the future we should fix the VBT lifetime issue in
order to be able to drop the dedicated_external member of dig_port.

>
>> +                return true;
>> +
>> +        if (iom_dp_resource_lock(tc))
>> +                return false;
>> +
>> +        val = intel_de_read(display, IOM_DP_RESOURCE_MNG);
>> +
>> +        consumer = val & IOM_DDI_CONSUMER_MASK(tc_port);
>> +        consumer >>= IOM_DDI_CONSUMER_SHIFT(tc_port);
>> +
>> +        /*
>> +         * Bspec instructs to select first available DDI, but our driver is 
>> not
>> +         * ready for such dynamic allocation yet. For now, we force a 
>> "static"
>> +         * allocation: map the physical port (where HPD happens) to the
>> +         * encoder's DDI (logical TC port, represented by tc_port).
>> +         */
>> +        expected_consumer = IOM_DDI_CONSUMER_STATIC_TC(tc_port);
>> +        expected_consumer >>= IOM_DDI_CONSUMER_SHIFT(tc_port);
>> +
>> +        if (allocate) {
>> +                struct intel_encoder *other_encoder;
>> +
>> +                /*
>> +                 * Check if this encoder's DDI is already allocated for 
>> another
>> +                 * physical port, which could have happened prior to the 
>> driver
>> +                 * taking over (e.g. GOP).
>> +                 */
>> +                for_each_intel_encoder(display->drm, other_encoder) {
>> +                        enum tc_port other_tc_port = 
>> intel_encoder_to_tc(other_encoder);
>> +                        u32 other_consumer;
>> +
>> +                        if (tc_port == TC_PORT_NONE || other_tc_port == 
>> tc_port)
>> +                                continue;
>> +
>> +                        other_consumer = val & 
>> IOM_DDI_CONSUMER_MASK(other_tc_port);
>> +                        other_consumer >>= 
>> IOM_DDI_CONSUMER_SHIFT(other_tc_port);
>> +                        if (other_consumer == expected_consumer) {
>> +                                drm_err(display->drm, "Port %s: expected 
>> consumer %u already allocated another DDI; IOM_DP_RESOURCE_MNG=0x%08x\n",
>> +                                        tc->port_name, expected_consumer, 
>> val);
>> +                                ret = false;
>> +                                goto out_resource_unlock;
>> +                        }
>> +                }
>> +
>> +                if (consumer == 0) {
>> +                        /* DDI is free to use, let's allocate it. */
>> +                        val &= ~IOM_DDI_CONSUMER_MASK(tc_port);
>> +                        val |= IOM_DDI_CONSUMER(tc_port, expected_consumer);
>> +                        intel_de_write(display, IOM_DP_RESOURCE_MNG, val);
>> +                        ret = true;
>> +                } else if (consumer == expected_consumer) {
>> +                        /*
>> +                         * Nothing to do, as the expected "static" DDI 
>> allocation is
>> +                         * already in place.
>> +                         */
>> +                        ret = true;
>> +                } else {
>> +                        drm_err(display->drm, "Port %s: DDI already 
>> allocated for consumer %u; IOM_DP_RESOURCE_MNG=0x%08x\n",
>> +                                tc->port_name, consumer, val);
>> +                        ret = false;
>> +                }
>> +        } else {
>> +                drm_WARN_ON(display->drm, consumer != expected_consumer);
>> +                val &= ~IOM_DDI_CONSUMER_MASK(tc_port);
>> +                intel_de_write(display, IOM_DP_RESOURCE_MNG, val);
>> +                ret = true;
>> +        }
>> +
>> +out_resource_unlock:
>> +        iom_dp_resource_unlock(tc);
>> +
>> +        return ret;
>> +}
>> +
>>  static bool xelpdp_tc_phy_connect(struct intel_tc_port *tc, int 
>> required_lanes)
>>  {
>>          tc->lock_wakeref = tc_cold_block(tc);
>> @@ -1210,9 +1351,12 @@ static bool xelpdp_tc_phy_connect(struct 
>> intel_tc_port *tc, int required_lanes)
>>                  return true;
>>          }
>>  
>> -        if (!xelpdp_tc_phy_enable_tcss_power(tc, true))
>> +        if (!xe3p_tc_iom_allocate_ddi(tc, true))
>
>This doesn't work. A connector registered to userspace must be always
>functional (except for MST connectors which are dynamically registered).
>So the DDI allocation and with that connecting the PHY cannot fail here
>for a legacy connector/PHY. Instead of this the allocation could be
>moved to happen already in intel_tc_phy_ops::init for now, allocating a
>DDI for a legacy PHY (intel_tc_port::legacy_port == true) and if that
>fails also fail the corresponding connector/encoder registration in
>intel_ddi_init(). The DDI would need to be released by
>intel_tc_port_cleanup().

Ah, I see.  Well, I guess for legacy connections, doing your suggestion
seems fine.

Is there any power management aspect that we should be aware here? Like,
some power well being disabled and causing the allocation to be "lost"
somehow.  Since this thing is in the TCSS, I think no display power
wells could cause that, right?

By the way, I wonder how we would handle things in the future when/if we
enable the dynamic DDI allocation thing.

--
Gustavo Sousa

>
>>                  goto out_unblock_tccold;
>>  
>> +        if (!xelpdp_tc_phy_enable_tcss_power(tc, true))
>> +                goto out_deallocate_ddi;
>> +
>>          xelpdp_tc_phy_take_ownership(tc, true);
>>  
>>          read_pin_configuration(tc);
>> @@ -1226,6 +1370,9 @@ static bool xelpdp_tc_phy_connect(struct intel_tc_port 
>> *tc, int required_lanes)
>>          xelpdp_tc_phy_take_ownership(tc, false);
>>          xelpdp_tc_phy_wait_for_tcss_power(tc, false);
>>  
>> +out_deallocate_ddi:
>> +        xe3p_tc_iom_allocate_ddi(tc, false);
>> +
>>  out_unblock_tccold:
>>          tc_cold_unblock(tc, fetch_and_zero(&tc->lock_wakeref));
>>  
>> @@ -1236,6 +1383,8 @@ static void xelpdp_tc_phy_disconnect(struct 
>> intel_tc_port *tc)
>>  {
>>          switch (tc->mode) {
>>          case TC_PORT_LEGACY:
>> +                xe3p_tc_iom_allocate_ddi(tc, false);
>> +                fallthrough;
>>          case TC_PORT_DP_ALT:
>>                  xelpdp_tc_phy_take_ownership(tc, false);
>>                  xelpdp_tc_phy_enable_tcss_power(tc, false);
>> 
>> -- 
>> 2.51.0
>>

Reply via email to