date:20221017

[PATCH] drm/edid: Parse VRR cap fields from HFVSDB block

2022-10-17 Thread Ankit Nautiyal

This patch parses HFVSDB fields for VRR capabilities of an
HDMI2.1 sink and stores the VRR caps in a new structure in
drm_hdmi_info.

Signed-off-by: Ankit Nautiyal 
---
 drivers/gpu/drm/drm_edid.c  | 26 --
 include/drm/drm_connector.h | 27 +++
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 47465b9765f1..bb1f7d899580 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -5823,6 +5823,21 @@ static void drm_parse_ycbcr420_deep_color_info(struct 
drm_connector *connector,
hdmi->y420_dc_modes = dc_mask;
 }
 
+static void drm_parse_vrr_info(struct drm_hdmi_vrr_cap *hdmi_vrr,
+  const u8 *hf_scds)
+{
+   if (hf_scds[8] & DRM_EDID_CNMVRR)
+   hdmi_vrr->cnm_vrr = true;
+   if (hf_scds[8] & DRM_EDID_CINEMA_VRR)
+   hdmi_vrr->cinema_vrr = true;
+   if (hf_scds[8] & DRM_EDID_MDELTA)
+   hdmi_vrr->m_delta = true;
+
+   hdmi_vrr->vrr_min = hf_scds[9] & DRM_EDID_VRR_MIN_MASK;
+   hdmi_vrr->vrr_max = (hf_scds[9] & DRM_EDID_VRR_MAX_UPPER_MASK) << 2;
+   hdmi_vrr->vrr_max |= hf_scds[10] & DRM_EDID_VRR_MAX_LOWER_MASK;
+}
+
 static void drm_parse_dsc_info(struct drm_hdmi_dsc_cap *hdmi_dsc,
   const u8 *hf_scds)
 {
@@ -5901,9 +5916,11 @@ static void drm_parse_hdmi_forum_scds(struct 
drm_connector *connector,
struct drm_display_info *display = >display_info;
struct drm_hdmi_info *hdmi = >hdmi;
struct drm_hdmi_dsc_cap *hdmi_dsc = >dsc_cap;
+   struct drm_hdmi_vrr_cap *hdmi_vrr = >vrr_cap;
int max_tmds_clock = 0;
u8 max_frl_rate = 0;
bool dsc_support = false;
+   bool vrr_support = false;
 
display->has_hdmi_infoframe = true;
 
@@ -5949,14 +5966,19 @@ static void drm_parse_hdmi_forum_scds(struct 
drm_connector *connector,
 
drm_parse_ycbcr420_deep_color_info(connector, hf_scds);
 
+   if (cea_db_payload_len(hf_scds) >= 8 && hf_scds[8]) {
+   drm_parse_vrr_info(hdmi_vrr, hf_scds);
+   vrr_support = true;
+   }
+
if (cea_db_payload_len(hf_scds) >= 11 && hf_scds[11]) {
drm_parse_dsc_info(hdmi_dsc, hf_scds);
dsc_support = true;
}
 
drm_dbg_kms(connector->dev,
-   "HF-VSDB: max TMDS clock: %d KHz, HDMI 2.1 support: %s, DSC 
1.2 support: %s\n",
-   max_tmds_clock, str_yes_no(max_frl_rate), 
str_yes_no(dsc_support));
+   "HF-VSDB: max TMDS clock: %d KHz, HDMI 2.1 support: %s, VRR 
support: %s, DSC 1.2 support: %s\n",
+   max_tmds_clock, str_yes_no(max_frl_rate), 
str_yes_no(vrr_support), str_yes_no(dsc_support));
 }
 
 static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector,
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index b1b2df48d42c..ec6ef71ab5cd 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -219,6 +219,30 @@ struct drm_hdmi_dsc_cap {
u8 total_chunk_kbytes;
 };
 
+
+/**
+ * struct drm_hdmi_vrr_cap - VRR capabilities of HDMI sink
+ * Describes the VRR support provided by HDMI 2.1 sink.
+ * The information is fetched fom additional HFVSDB blocks defined
+ * for HDMI 2.1.
+ */
+struct drm_hdmi_vrr_cap {
+   /** @cnm_vrr: sink supports negative Mvrr values*/
+   bool cnm_vrr;
+
+   /** @cinema_vrr: sink supports fractional and integer media rates < 
VRRmin*/
+   bool cinema_vrr;
+
+   /** @m_delta: sink can anticipate and compensate for frame-to-frame 
variation in Mvrr */
+   bool m_delta;
+
+   /** @vrr_min: VRRmin - lowest framerate in Hz that sink can support in 
VRR */
+   u8 vrr_min;
+
+   /** @vrr_max: VRRmax - highest framerate in Hz that sink can support in 
VRR */
+   u16 vrr_max;
+};
+
 /**
  * struct drm_hdmi_info - runtime information about the connected HDMI sink
  *
@@ -259,6 +283,9 @@ struct drm_hdmi_info {
 
/** @dsc_cap: DSC capabilities of the sink */
struct drm_hdmi_dsc_cap dsc_cap;
+
+   /** @vrr_cap: VRR capabilities of the sink */
+   struct drm_hdmi_vrr_cap vrr_cap;
 };
 
 /**
-- 
2.25.1

Re: [v1] arm64: dts: qcom: sc7280: assign DSI clock source parents

2022-10-17 Thread Bjorn Andersson

On Wed, 7 Sep 2022 17:05:53 +0530, Rajeev Nandan wrote:
> Assign DSI clock source parents to DSI PHY clocks.
> 
> 

Applied, thanks!

[1/1] arm64: dts: qcom: sc7280: assign DSI clock source parents
  commit: 80edac18ac173f0f0130c2164f75ddadcd68fa7f

Best regards,
-- 
Bjorn Andersson

Re: (subset) [PATCH 0/9] arm: dts: qcom: rename DSI PHY nodes

2022-10-17 Thread Bjorn Andersson

On Sat, 24 Sep 2022 12:00:59 +0300, Dmitry Baryshkov wrote:
> Historically DSI PHY device tree nodes used the dsi-phy@ names. Replace
> them with generic phy@ names.
> 
> Dmitry Baryshkov (9):
>   ARM: dts: qcom-apq8064: change DSI PHY node name to generic one
>   ARM: dts: qcom-msm8974: change DSI PHY node name to generic one
>   arm64: dts: qcom: msm8916: change DSI PHY node name to generic one
>   arm64: dts: qcom: msm8996: change DSI PHY node name to generic one
>   arm64: dts: qcom: sc7180: change DSI PHY node name to generic one
>   arm64: dts: qcom: sdm630: change DSI PHY node name to generic one
>   arm64: dts: qcom: sdm660: change DSI PHY node name to generic one
>   arm64: dts: qcom: sdm845: change DSI PHY node name to generic one
>   arm64: dts: qcom: sm8250: change DSI PHY node name to generic one
> 
> [...]

Applied, thanks!

[1/9] ARM: dts: qcom-apq8064: change DSI PHY node name to generic one
  commit: cf6cea98662dc9ecf7707076b10499785870ff23
[2/9] ARM: dts: qcom-msm8974: change DSI PHY node name to generic one
  commit: 798e65cdd910a59a34de365ff9e00c186fb568b4

Best regards,
-- 
Bjorn Andersson

Re: [PATCH v7 01/10] drm: bridge: Add Samsung DSIM bridge driver

2022-10-17 Thread Jagan Teki

On Mon, Oct 17, 2022 at 2:31 PM Marek Szyprowski
 wrote:
>
> Hi,
>
> On 17.10.2022 10:48, Marek Vasut wrote:
> > On 10/17/22 09:43, Jagan Teki wrote:
> >> On Mon, Oct 17, 2022 at 12:49 PM Marek Vasut  wrote:
> >>> On 10/17/22 04:49, Jagan Teki wrote:
>  On Sun, Oct 16, 2022 at 3:16 AM Marek Vasut  wrote:
> >
> > On 10/5/22 17:13, Jagan Teki wrote:
> >> Samsung MIPI DSIM controller is common DSI IP that can be used in
> >> various
> >> SoCs like Exynos, i.MX8M Mini/Nano.
> >>
> >> In order to access this DSI controller between various platform
> >> SoCs,
> >> the ideal way to incorporate this in the drm stack is via the drm
> >> bridge
> >> driver.
> >>
> >> This patch is trying to differentiate platform-specific and
> >> bridge driver
> >> code by maintaining exynos platform glue code in exynos_drm_dsi.c
> >> driver
> >> and common bridge driver code in samsung-dsim.c providing that
> >> the new
> >> platform-specific glue should be supported in the bridge driver,
> >> unlike
> >> exynos platform drm drivers.
> >>
> >> - Add samsung_dsim_plat_data for keeping platform-specific
> >> attributes like
> >>  host_ops, irq_ops, and hw_type.
> >>
> >> - Initialize the plat_data hooks for exynos platform in
> >> exynos_drm_dsi.c.
> >>
> >> - samsung_dsim_probe is the common probe call across
> >> exynos_drm_dsi.c and
> >>  samsung-dsim.c.
> >>
> >> - plat_data hooks like host_ops and irq_ops are invoked during the
> >>  respective bridge call chains.
> >
> > Maybe the Subject should say "Split ... driver" or "Move ...
> > driver" ,
> > since it is not adding a new driver here ?
> 
>  Though it is not added a completely new driver, it is adding more
>  infrastructure platform code to be compatible with both Exynos and
>  i.MX8M. This is the prime reason for adding that commit head and
>  explaining the same in the commit body.
> >>>
> >>> Diffstat looks like this:
> >>>
> >>>drivers/gpu/drm/bridge/samsung-dsim.c   | 1703
> >>> ++
> >>>drivers/gpu/drm/exynos/Kconfig  |1 +
> >>>drivers/gpu/drm/exynos/exynos_drm_dsi.c | 1766
> >>> ++-
> >>>include/drm/bridge/samsung-dsim.h   |  113 ++
> >>>7 files changed, 1952 insertions(+), 1653 deletions(-)
> >>>
> >>> Looks to me like most of the code is just moved from existing driver in
> >>> this patch.
> >>
> >> Yeah, as I explained (from commit) it is moved, updated, and written
> >> the plat code. How about this head?
> >>
> >> "drm: bridge: Add Samsung DSIM bridge (Split from exynos_drm_dsi)"
> >
> > I disagree with the "Add" part of the Subject, but I'll wait for
> > others' opinion here.
>
> Maybe something like a "Generalize Exynos-DSI DRM driver into a generic
> Samsung DSIM bridge"?

I agreed.

Jagan.

Re: [git pull] drm fixes for 6.1-rc1

2022-10-17 Thread Arthur Marsh

Thanks Arunpravin, your patch applied to the 6.1-rc1 code built a kernel that 
loaded the amdgpu module on my pc with Cape Verde GPU card with no problems.

Regards,

Arthur. 

On 18 October 2022 7:10:45 am ACDT, Arunpravin Paneer Selvam 
 wrote:
>Hi Christian,
>
>Looks like we have to exit the loop if there are no blocks to compare.
>May be that's why the function returns false.
>
>@Arthur Marsh Could you please test the attached patch.
>
>Thanks,
>Arun
>
>On 10/17/2022 1:39 PM, Christian König wrote:
>> Am 17.10.22 um 10:01 schrieb Dave Airlie:
>>> On Mon, 17 Oct 2022 at 17:07, Christian König  
>>> wrote:
 Hi Arun,
 
 the hw generation doesn't matter. This error message here:
 
 amdgpu: Move buffer fallback to memcpy unavailable
 
 indicates that the detection of linear buffers still doesn't work as
 expected or that we have a bug somewhere else.
 
 Maybe the limiting when SDMA moves are not available isn't working
 correctly?
>>> It is a CAPE_VERDE, so maybe something with the SI UVD memory limitations?
>> 
>> Yeah, good point. Could be that we try to move something into the UVD memory 
>> window and that something isn't allocated linearly.
>> 
>> Arun can you trace the allocation and make sure that all kernel allocations 
>> have the CONTIGUOUS flag set?
>> 
>> Thanks,
>> Christian.
>> 
>>> 
>>> Dave.
>> 

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

[PATCH] drm/amdkfd: Fix memory leak in kfd_mem_dmamap_userptr()

2022-10-17 Thread Rafael Mendonca

If the number of pages from the userptr BO differs from the SG BO then the
allocated memory for the SG table doesn't get freed before returning
-EINVAL, which may lead to a memory leak in some error paths. Fix this by
checking the number of pages before allocating memory for the SG table.

Fixes: 264fb4d332f5 ("drm/amdgpu: Add multi-GPU DMA mapping helpers")
Signed-off-by: Rafael Mendonca 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 978d3970b5cc..84f44f7e4111 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -510,13 +510,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
struct ttm_tt *ttm = bo->tbo.ttm;
int ret;
 
+   if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+   return -EINVAL;
+
ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
if (unlikely(!ttm->sg))
return -ENOMEM;
 
-   if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
-   return -EINVAL;
-
/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
ttm->num_pages, 0,
-- 
2.34.1

答复: [PATCH] drm/amdkfd: use vma_lookup() instead of find_vma()

2022-10-17 Thread 王德明

Hi,
The function vma_lookup show below.  Vma valid check is included in it. Or, 
What other questions do you have?

static inline
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
 {
 struct vm_area_struct *vma = find_vma(mm, addr);

 if (vma && addr < vma->vm_start)
 vma = NULL;

 return vma;
 }


> from: Felix Kuehling 
> time: 2022年10月18日 3:35
> to: tomorrow Wang (王德明) ;
> airl...@gmail.com; dan...@ffwll.ch; alexander.deuc...@amd.com;
> christian.koe...@amd.com; xinhui@amd.com
> linux-ker...@vger.kernel.org
> sub: Re: [PATCH] drm/amdkfd: use vma_lookup() instead of find_vma()
> 
> 
> On 2022-10-06 22:48, Deming Wang wrote:
> > Using vma_lookup() verifies the start address is contained in the
> > found vma.  This results in easier to read the code.
> 
> Thank you for the patches. This and your other patch look good to me.
> However, you missed one use of find_vma in svm_range_is_valid. Is that an
> oversight or is there a reason why we need to use find_vma there?
> 
> If you're going to respin it, you may also squash the two patches into one.
> 
> Thanks,
>Felix
> 
> 
> >
> > Signed-off-by: Deming Wang 
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 ++--
> >   1 file changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > index 64fdf63093a0..cabcc2ca3c23 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> > @@ -1586,8 +1586,8 @@ static int svm_range_validate_and_map(struct
> mm_struct *mm,
> > unsigned long npages;
> > bool readonly;
> >
> > -   vma = find_vma(mm, addr);
> > -   if (!vma || addr < vma->vm_start) {
> > +   vma = vma_lookup(mm, addr);
> > +   if (!vma) {
> > r = -EFAULT;
> > goto unreserve_out;
> > }
> > @@ -2542,8 +2542,8 @@ svm_range_get_range_boundaries(struct
> kfd_process *p, int64_t addr,
> > struct interval_tree_node *node;
> > unsigned long start_limit, end_limit;
> >
> > -   vma = find_vma(p->mm, addr << PAGE_SHIFT);
> > -   if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> > +   vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
> > +   if (!vma) {
> > pr_debug("VMA does not exist in address [0x%llx]\n", addr);
> > return -EFAULT;
> > }
> > @@ -2871,8 +2871,8 @@ svm_range_restore_pages(struct amdgpu_device
> *adev, unsigned int pasid,
> > /* __do_munmap removed VMA, return success as we are handling stale
> >  * retry fault.
> >  */
> > -   vma = find_vma(mm, addr << PAGE_SHIFT);
> > -   if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> > +   vma = vma_lookup(mm, addr << PAGE_SHIFT);
> > +   if (!vma) {
> > pr_debug("address 0x%llx VMA is removed\n", addr);
> > r = 0;
> > goto out_unlock_range;


smime.p7s
Description: S/MIME cryptographic signature

RE: [PATCH] drm/amdgpu/powerplay/psm: Fix memory leak in power state init

2022-10-17 Thread Quan, Evan

[AMD Official Use Only - General]

Reviewed-by: Evan Quan 

> -Original Message-
> From: Rafael Mendonca 
> Sent: Tuesday, October 18, 2022 8:54 AM
> To: Quan, Evan ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; David
> Airlie ; Daniel Vetter 
> Cc: Rafael Mendonca ; amd-
> g...@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
> ker...@vger.kernel.org
> Subject: [PATCH] drm/amdgpu/powerplay/psm: Fix memory leak in power
> state init
> 
> Commit 902bc65de0b3 ("drm/amdgpu/powerplay/psm: return an error in
> power state init") made the power state init function return early in case of
> failure to get an entry from the powerplay table, but it missed to clean up 
> the
> allocated memory for the current power state before returning.
> 
> Fixes: 902bc65de0b3 ("drm/amdgpu/powerplay/psm: return an error in
> power state init")
> Signed-off-by: Rafael Mendonca 
> ---
>  drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
> index 67d7da0b6fed..1d829402cd2e 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
> @@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr
> *hwmgr)
>   for (i = 0; i < table_entries; i++) {
>   result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr,
> i, state);
>   if (result) {
> + kfree(hwmgr->current_ps);
>   kfree(hwmgr->request_ps);
>   kfree(hwmgr->ps);
> + hwmgr->current_ps = NULL;
>   hwmgr->request_ps = NULL;
>   hwmgr->ps = NULL;
>   return -EINVAL;
> --
> 2.34.1

[PATCH] drm/amdgpu/powerplay/psm: Fix memory leak in power state init

2022-10-17 Thread Rafael Mendonca

Commit 902bc65de0b3 ("drm/amdgpu/powerplay/psm: return an error in power
state init") made the power state init function return early in case of
failure to get an entry from the powerplay table, but it missed to clean up
the allocated memory for the current power state before returning.

Fixes: 902bc65de0b3 ("drm/amdgpu/powerplay/psm: return an error in power state 
init")
Signed-off-by: Rafael Mendonca 
---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
index 67d7da0b6fed..1d829402cd2e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
@@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr)
for (i = 0; i < table_entries; i++) {
result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr, i, state);
if (result) {
+   kfree(hwmgr->current_ps);
kfree(hwmgr->request_ps);
kfree(hwmgr->ps);
+   hwmgr->current_ps = NULL;
hwmgr->request_ps = NULL;
hwmgr->ps = NULL;
return -EINVAL;
-- 
2.34.1

Re: [PATCH v2 1/2] drm/i915: Add intel_ prefix to struct ip_version

2022-10-17 Thread Lucas De Marchi


On Mon, Oct 17, 2022 at 10:29:56AM -0700, Lucas De Marchi wrote:

On Tue, Oct 11, 2022 at 08:38:50AM -0700, Radhakrishna Sripada wrote:

Rename struct ip_version to intel_ip_version to comply with the
naming conventions for structures.

Suggested-by: Jani Nikula 
Signed-off-by: Radhakrishna Sripada 



Reviewed-by: Lucas De Marchi 


both patches pushed,

thanks
Lucas De Marchi

Re: [PATCH v5 4/4] drm/i915: Improve long running compute w/a for GuC submission

2022-10-17 Thread Ceraolo Spurio, Daniele





On 10/6/2022 2:38 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

A workaround was added to the driver to allow compute workloads to run
'forever' by disabling pre-emption on the RCS engine for Gen12.
It is not totally unbound as the heartbeat will kick in eventually
and cause a reset of the hung engine.

However, this does not work well in GuC submission mode. In GuC mode,
the pre-emption timeout is how GuC detects hung contexts and triggers
a per engine reset. Thus, disabling the timeout means also losing all
per engine reset ability. A full GT reset will still occur when the
heartbeat finally expires, but that is a much more destructive and
undesirable mechanism.

The purpose of the workaround is actually to give compute tasks longer
to reach a pre-emption point after a pre-emption request has been
issued. This is necessary because Gen12 does not support mid-thread
pre-emption and compute tasks can have long running threads.

So, rather than disabling the timeout completely, just set it to a
'long' value.

v2: Review feedback from Tvrtko - must hard code the 'long' value
instead of determining it algorithmically. So make it an extra CONFIG
definition. Also, remove the execlist centric comment from the
existing pre-emption timeout CONFIG option given that it applies to
more than just execlists.

Signed-off-by: John Harrison 
Reviewed-by: Daniele Ceraolo Spurio  (v1)


r-b stands.

Daniele


Acked-by: Michal Mrozek 
Acked-by: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/Kconfig.profile  | 26 +++
  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  9 ++--
  2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile 
b/drivers/gpu/drm/i915/Kconfig.profile
index 39328567c2007..7cc38d25ee5c8 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
default 640 # milliseconds
help
  How long to wait (in milliseconds) for a preemption event to occur
- when submitting a new context via execlists. If the current context
- does not hit an arbitration point and yield to HW before the timer
- expires, the HW will be reset to allow the more important context
- to execute.
+ when submitting a new context. If the current context does not hit
+ an arbitration point and yield to HW before the timer expires, the
+ HW will be reset to allow the more important context to execute.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/preempt_timeout_ms
+
+ May be 0 to disable the timeout.
+
+ The compiled in default may get overridden at driver probe time on
+ certain platforms and certain engines which will be reflected in the
+ sysfs control.
+
+config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
+   int "Preempt timeout for compute engines (ms, jiffy granularity)"
+   default 7500 # milliseconds
+   help
+ How long to wait (in milliseconds) for a preemption event to occur
+ when submitting a new context to a compute capable engine. If the
+ current context does not hit an arbitration point and yield to HW
+ before the timer expires, the HW will be reset to allow the more
+ important context to execute.
  
  	  This is adjustable via

  /sys/class/drm/card?/engine/*/preempt_timeout_ms
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index fcbccd8d244e9..c1257723d1949 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -508,9 +508,14 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
  
-	/* Override to uninterruptible for OpenCL workloads. */

+   /*
+* Mid-thread pre-emption is not available in Gen12. Unfortunately,
+* some compute workloads run quite long threads. That means they get
+* reset due to not pre-empting in a timely manner. So, bump the
+* pre-emption timeout value to be much higher for compute engines.
+*/
if (GRAPHICS_VER(i915) == 12 && (engine->flags & 
I915_ENGINE_HAS_RCS_REG_STATE))
-   engine->props.preempt_timeout_ms = 0;
+   engine->props.preempt_timeout_ms = 
CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
  
  	/* Cap properties according to any system limits */

  #define CLAMP_PROP(field) \

Re: [PATCH v5 1/4] drm/i915/guc: Limit scheduling properties to avoid overflow

2022-10-17 Thread Ceraolo Spurio, Daniele





On 10/6/2022 2:38 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

GuC converts the pre-emption timeout and timeslice quantum values into
clock ticks internally. That significantly reduces the point of 32bit
overflow. On current platforms, worst case scenario is approximately
110 seconds. Rather than allowing the user to set higher values and
then get confused by early timeouts, add limits when setting these
values.

v2: Add helper functions for clamping (review feedback from Tvrtko).
v3: Add a bunch of BUG_ON range checks in addition to the checks
already in the clamping functions (Tvrtko)

Signed-off-by: John Harrison 
Reviewed-by: Daniele Ceraolo Spurio  (v1)


r-b stands

Daniele


Acked-by: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/gt/intel_engine.h|  6 ++
  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 69 +++
  drivers/gpu/drm/i915/gt/sysfs_engines.c   | 25 ---
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   | 21 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  8 +++
  5 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index 04e435bce79bd..cbc8b857d5f7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs 
*engine)
return engine->hung_ce;
  }
  
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);

+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 
value);
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 
value);
+
  #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 2ddcad497fa30..8f16955f0821e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -512,6 +512,26 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
}
  
+	/* Cap properties according to any system limits */

+#define CLAMP_PROP(field) \
+   do { \
+   u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+   if (clamp != engine->props.field) { \
+   drm_notice(>i915->drm, \
+  "Warning, clamping %s to %lld to prevent 
overflow\n", \
+  #field, clamp); \
+   engine->props.field = clamp; \
+   } \
+   } while (0)
+
+   CLAMP_PROP(heartbeat_interval_ms);
+   CLAMP_PROP(max_busywait_duration_ns);
+   CLAMP_PROP(preempt_timeout_ms);
+   CLAMP_PROP(stop_timeout_ms);
+   CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
+
engine->defaults = engine->props; /* never to change again */
  
  	engine->context_size = intel_engine_context_size(gt, engine->class);

@@ -534,6 +554,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id,
return 0;
  }
  
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)

+{
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 
value)
+{
+   value = min(value, jiffies_to_nsecs(2));
+
+   return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+   /*
+* NB: The GuC API only supports 32bit values. However, the limit is 
further
+* reduced due to internal calculations which would otherwise overflow.
+*/
+   if (intel_guc_submission_is_wanted(>gt->uc.guc))
+   value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 
value)
+{
+   /*
+* NB: The GuC API only supports 32bit values. However, the limit is 
further
+* reduced due to internal calculations which would otherwise overflow.
+*/
+   if (intel_guc_submission_is_wanted(>gt->uc.guc))
+   value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+   value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+   return value;
+}
+
  static void __setup_engine_capabilities(struct intel_engine_cs *engine)
  {
struct

Re: [PATCH v2 3/7] drm/i915/uc: use different ggtt pin offsets for uc loads

2022-10-17 Thread John Harrison


On 10/12/2022 17:03, Daniele Ceraolo Spurio wrote:

Our current FW loading process is the same for all FWs:

- Pin FW to GGTT at the start of the ggtt->uc_fw node
- Load the FW
- Unpin

This worked because we didn't have a case where 2 FWs would be loaded on
the same GGTT at the same time. On MTL, however, this can happend if both
GTs are reset at the same time, so we can't pin everything in the same
spot and we need to use separate offset. For simplicity, instead of
calculating the exact required size, we reserve a 2MB slot for each fw.

v2: fail fetch if FW is > 2MBs, improve comments (John)

Signed-off-by: Daniele Ceraolo Spurio 
Cc: John Harrison 
Cc: Alan Previn 
---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 30 +---
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 13 ++
  2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index de2843dc1307..021290a26195 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -575,6 +575,17 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
err = firmware_request_nowarn(, uc_fw->file_selected.path, dev);
memcpy(_ideal, _fw->file_wanted, sizeof(file_ideal));
  
+	if (!err && fw->size > INTEL_UC_RSVD_GGTT_PER_FW) {

+   drm_err(>drm,
+   "%s firmware %s: size (%zuKB) exceeds max supported size 
(%uKB)\n",
+   intel_uc_fw_type_repr(uc_fw->type), 
uc_fw->file_selected.path,
+   fw->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K);
+
+   /* try to find another blob to load */
+   release_firmware(fw);
+   err = -ENOENT;
+   }
+
/* Any error is terminal if overriding. Don't bother searching for 
older versions */
if (err && intel_uc_fw_is_overridden(uc_fw))
goto fail;
@@ -677,14 +688,28 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
  
  static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)

  {
-   struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+   struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+   struct i915_ggtt *ggtt = gt->ggtt;
struct drm_mm_node *node = >uc_fw;
+   u32 offset = uc_fw->type * INTEL_UC_RSVD_GGTT_PER_FW;
+
+   /*
+* To keep the math simple, we use 8MB for the root tile and 8MB for
+* the media one. This will need to be updated if we ever have more
+* than 1 media GT
+*/
+   BUILD_BUG_ON(INTEL_UC_FW_NUM_TYPES * INTEL_UC_RSVD_GGTT_PER_FW > SZ_8M);
+   GEM_BUG_ON(gt->type == GT_MEDIA && gt->info.id > 1);
+   if (gt->type == GT_MEDIA)
+   offset += SZ_8M;
This is all because render/media GTs share the same page tables? Regular 
multi-tile is completely separate address spaces and can use a single 
common address? Otherwise, it seems like 'offset = gt->info.id * 2M' 
would be the generic solution and no reference to GT_MEDIA required. So 
maybe add a quick comment to that effect?



  
  	GEM_BUG_ON(!drm_mm_node_allocated(node));

GEM_BUG_ON(upper_32_bits(node->start));
GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
+   GEM_BUG_ON(offset + uc_fw->obj->base.size > node->size);
+   GEM_BUG_ON(uc_fw->obj->base.size > INTEL_UC_RSVD_GGTT_PER_FW);
  
-	return lower_32_bits(node->start);

+   return lower_32_bits(node->start + offset);
  }
  
  static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)

@@ -699,7 +724,6 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
dummy->bi.pages = obj->mm.pages;
  
  	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));

-   GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size);
  
  	/* uc_fw->obj cache domains were not controlled across suspend */

if (i915_gem_object_has_struct_page(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index cb586f7df270..7b3db41efa6e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -6,6 +6,7 @@
  #ifndef _INTEL_UC_FW_H_
  #define _INTEL_UC_FW_H_
  
+#include 

  #include 
  #include "intel_uc_fw_abi.h"
  #include "intel_device_info.h"
@@ -114,6 +115,18 @@ struct intel_uc_fw {
 
(uc)->fw.file_selected.minor_ver, \
 
(uc)->fw.file_selected.patch_ver))
  
+/*

+ * When we load the uC binaries, we pin them in a reserved section at the top 
of
+ * the GGTT, which is ~18 MBs. On multi-GT systems where the GTs share the 
GGTT,
^^^ meaning only systems with a render GT + media GT as opposed to 
regular multi-GT systems? Would be good to make that explicit either 
here or above (or both).


John.


+ * we also need to make sure that each binary is pinned to a unique location
+ * during load, because the different GT

Re: [PATCH v7 00/21] Move all drivers to a common dma-buf locking convention

2022-10-17 Thread Dmitry Osipenko

On 10/17/22 20:22, Dmitry Osipenko wrote:
> Hello,
> 
> This series moves all drivers to a dynamic dma-buf locking specification.
> From now on all dma-buf importers are made responsible for holding
> dma-buf's reservation lock around all operations performed over dma-bufs
> in accordance to the locking specification. This allows us to utilize
> reservation lock more broadly around kernel without fearing of a potential
> deadlocks.
> 
> This patchset passes all i915 selftests. It was also tested using VirtIO,
> Panfrost, Lima, Tegra, udmabuf, AMDGPU and Nouveau drivers. I tested cases
> of display+GPU, display+V4L and GPU+V4L dma-buf sharing (where appropriate),
> which covers majority of kernel drivers since rest of the drivers share
> same or similar code paths.
> 
> Changelog:
> 
> v7: - Rebased on top of recent drm-misc-next.
> 
> - Added ack from Jason Gunthorpe to the RDMA patch.
> 
> - Added iosys_map_clear() to dma_buf_vmap_unlocked(), making it fully
>   consistent with dma_buf_vmap().
> 
> v6: - Added r-b from Michael Ruhl to the i915 patch.
> 
> - Added acks from Sumit Semwal and updated commit message of the
>   "Move dma_buf_vmap() to dynamic locking specification" patch like
>   was suggested by Sumit.
> 
> - Added "!dmabuf" check to dma_buf_vmap_unlocked() to match the locked
>   variant of the function, for consistency.
> 
> v5: - Added acks and r-bs that were given to v4.
> 
> - Changed i915 preparation patch like was suggested by Michael Ruhl.
>   The scope of reservation locking is smaller now.
> 
> v4: - Added dma_buf_mmap() to the "locking convention" documentation,
>   which was missed by accident in v3.
> 
> - Added acks from Christian König, Tomasz Figa and Hans Verkuil that
>   they gave to couple v3 patches.
> 
> - Dropped the "_unlocked" postfix from function names that don't have
>   the locked variant, as was requested by Christian König.
> 
> - Factored out the per-driver preparations into separate patches
>   to ease reviewing of the changes, which is now doable without the
>   global dma-buf functions renaming.
> 
> - Factored out the dynamic locking convention enforcements into separate
>   patches which add the final dma_resv_assert_held(dmabuf->resv) to the
>   dma-buf API functions.
> 
> v3: - Factored out dma_buf_mmap_unlocked() and attachment functions
>   into aseparate patches, like was suggested by Christian König.
> 
> - Corrected and factored out dma-buf locking documentation into
>   a separate patch, like was suggested by Christian König.
> 
> - Intel driver dropped the reservation locking fews days ago from
>   its BO-release code path, but we need that locking for the imported
>   GEMs because in the end that code path unmaps the imported GEM.
>   So I added back the locking needed by the imported GEMs, updating
>   the "dma-buf attachment locking specification" patch appropriately.
> 
> - Tested Nouveau+Intel dma-buf import/export combo.
> 
> - Tested udmabuf import to i915/Nouveau/AMDGPU.
> 
> - Fixed few places in Etnaviv, Panfrost and Lima drivers that I missed
>   to switch to locked dma-buf vmapping in the drm/gem: Take reservation
>   lock for vmap/vunmap operations" patch. In a result invalidated the
>   Christian's r-b that he gave to v2.
> 
> - Added locked dma-buf vmap/vunmap functions that are needed for fixing
>   vmappping of Etnaviv, Panfrost and Lima drivers mentioned above.
>   I actually had this change stashed for the drm-shmem shrinker patchset,
>   but then realized that it's already needed by the dma-buf patches.
>   Also improved my tests to better cover these code paths.
> 
> v2: - Changed locking specification to avoid problems with a cross-driver
>   ww locking, like was suggested by Christian König. Now the attach/detach
>   callbacks are invoked without the held lock and exporter should take the
>   lock.
> 
> - Added "locking convention" documentation that explains which dma-buf
>   functions and callbacks are locked/unlocked for importers and exporters,
>   which was requested by Christian König.
> 
> - Added ack from Tomasz Figa to the V4L patches that he gave to v1.
> 
> Dmitry Osipenko (21):
>   dma-buf: Add unlocked variant of vmapping functions
>   dma-buf: Add unlocked variant of attachment-mapping functions
>   drm/gem: Take reservation lock for vmap/vunmap operations
>   drm/prime: Prepare to dynamic dma-buf locking specification
>   drm/armada: Prepare to dynamic dma-buf locking specification
>   drm/i915: Prepare to dynamic dma-buf locking specification
>   drm/omapdrm: Prepare to dynamic dma-buf locking specification
>   drm/tegra: Prepare to dynamic dma-buf locking specification
>   drm/etnaviv: Prepare to dynamic dma-buf locking specification
>   RDMA/umem: Prepare to dynamic dma-buf locking specification
>   misc: fastrpc: Prepare

Re: [Bug][5.18-rc0] Between commits ed4643521e6a and 34af78c4e616, appears warning "WARNING: CPU: 31 PID: 51848 at drivers/dma-buf/dma-fence-array.c:191 dma_fence_array_create+0x101/0x120" and some ga

2022-10-17 Thread Mikhail Gavrilov

On Wed, May 11, 2022 at 5:01 PM Christian König
 wrote:
>
>
> We have implemented a workaround, but still don't know the exact root cause.
>
> If anybody wants to look into this it would be rather helpful to be able
> to reproduce the issue.
>
> Regards,
> Christian.

I see that issue was returned after this commit
dd80d9c8eecac8c516da5b240d01a35660ba6cb6 is the first bad commit
commit dd80d9c8eecac8c516da5b240d01a35660ba6cb6
Author: Christian König 
Date:   Thu Jul 14 10:23:38 2022 +0200

drm/amdgpu: revert "partial revert "remove ctx->lock" v2"

This reverts commit 94f4c4965e5513ba624488f4b601d6b385635aec.

We found that the bo_list is missing a protection for its list entries.
Since that is fixed now this workaround can be removed again.

Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
Signed-off-by: Alex Deucher 

 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 21 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  2 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 -
 3 files changed, 6 insertions(+), 18 deletions(-)

The games Forza Horizon 4 and Cyberpunk 2077 again hangs at start.


-- 
Best Regards,
Mike Gavrilov.

linux-next: manual merge of the drm-intel tree with Linus' tree

2022-10-17 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the drm-intel tree got a conflict in:

  drivers/gpu/drm/i915/i915_driver.c

between commit:

  1c66a12ab431 ("drm/i915: Handle each GT on init/release and suspend/resume")

from Linus' tree and commit:

  3703060d17b0 ("drm/i915/display: remove drm_device aliases")

from the drm-intel tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/gpu/drm/i915/i915_driver.c
index c459eb362c47,e7b2ebc6b88d..
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@@ -337,10 -324,11 +337,11 @@@ static int i915_driver_early_probe(stru
if (i915_inject_probe_failure(dev_priv))
return -ENODEV;
  
-   intel_device_info_subplatform_init(dev_priv);
+   intel_device_info_runtime_init_early(dev_priv);
+ 
intel_step_init(dev_priv);
  
 -  intel_uncore_mmio_debug_init_early(_priv->mmio_debug);
 +  intel_uncore_mmio_debug_init_early(dev_priv);
  
spin_lock_init(_priv->irq_lock);
spin_lock_init(_priv->gpu_error.lock);
@@@ -738,10 -716,6 +739,9 @@@ static void i915_driver_hw_remove(struc
   */
  static void i915_driver_register(struct drm_i915_private *dev_priv)
  {
-   struct drm_device *dev = _priv->drm;
 +  struct intel_gt *gt;
 +  unsigned int i;
 +
i915_gem_driver_register(dev_priv);
i915_pmu_register(dev_priv);
  


pgpCtTw4v4ha1.pgp
Description: OpenPGP digital signature

Re: [git pull] drm fixes for 6.1-rc1

2022-10-17 Thread Arunpravin Paneer Selvam


Hi Christian,

Looks like we have to exit the loop if there are no blocks to compare.
May be that's why the function returns false.

@Arthur Marsh Could you please test the attached patch.

Thanks,
Arun

On 10/17/2022 1:39 PM, Christian König wrote:

Am 17.10.22 um 10:01 schrieb Dave Airlie:
On Mon, 17 Oct 2022 at 17:07, Christian König 
 wrote:

Hi Arun,

the hw generation doesn't matter. This error message here:

amdgpu: Move buffer fallback to memcpy unavailable

indicates that the detection of linear buffers still doesn't work as
expected or that we have a bug somewhere else.

Maybe the limiting when SDMA moves are not available isn't working
correctly?
It is a CAPE_VERDE, so maybe something with the SI UVD memory 
limitations?


Yeah, good point. Could be that we try to move something into the UVD 
memory window and that something isn't allocated linearly.


Arun can you trace the allocation and make sure that all kernel 
allocations have the CONTIGUOUS flag set?


Thanks,
Christian.



Dave.


From 132ce83f893eaea743fb7f41a9dc72afea52cdaa Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam 
Date: Mon, 17 Oct 2022 13:15:21 -0700
Subject: [PATCH] drm/amdgpu: Fix for BO move issue

If there are no blocks to compare then exit
the loop.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dc262d2c2925..57277b1cf183 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(, cursor.size);
 
+   if (!cursor.remaining)
+   break;
+
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;
-- 
2.25.1

Re: [PATCH 3/3] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-17 Thread Dixit, Ashutosh

On Fri, 14 Oct 2022 20:26:18 -0700, Ashutosh Dixit wrote:
>
> From: Badal Nilawar 

Hi Badal,

One question below.

> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> index 1fb053cbf52db..3a9bb4387248e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> @@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m)
>   return 0;
>  }
>
> +static int mtl_drpc(struct seq_file *m)
> +{

Here we have:

> + global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9);
and
> + seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake);

In gen6_drpc we have:

mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
and
seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);

Also:
#define FORCEWAKE_MT_MMIO(0xa188)
#define FORCEWAKE_GT_GEN9   _MMIO(0xa188)

So they are both the same register. So what is the reason for this
difference, which one should we use?

Also let's have the prints in the same order as gen6_drpc (move fw request
before rc6 residency).

Thanks.
--
Ashutosh

[PATCH 13/16] drm/vmwgfx: Port the framebuffer code to drm fb helpers

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

Instead of using vmwgfx specific framebuffer implementation use the drm
fb helpers. There's no change in functionality, the only difference
is a reduction in the amount of code inside the vmwgfx module.

drm fb helpers do not deal correctly with changes in crtc preferred mode
at runtime, but the old fb code wasn't dealing with it either.
Same situation applies to high-res fb consoles - the old code was
limited to 1176x885 because it was checking for legacy/deprecated
memory limites, the drm fb helpers are limited to the initial resolution
set on fb due to first problem (drm fb helpers being unable to handle
hotplug crtc preferred mode changes).

This also removes the kernel config for disabling fb support which hasn't
been used or supported in a very long time.

Signed-off-by: Zack Rusin 
Reviewed-by: Maaz Mombasawala 
Reviewed-by: Martin Krastev 
---
 drivers/gpu/drm/vmwgfx/Kconfig  |   7 -
 drivers/gpu/drm/vmwgfx/Makefile |   2 -
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  58 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  35 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  | 831 
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c |  77 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h |   7 -
 7 files changed, 26 insertions(+), 991 deletions(-)
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c

diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index a4fabe208d9f..faddae3d6ac2 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -16,13 +16,6 @@ config DRM_VMWGFX
  virtual hardware.
  The compiled module will be called "vmwgfx.ko".
 
-config DRM_VMWGFX_FBCON
-   depends on DRM_VMWGFX && DRM_FBDEV_EMULATION
-   bool "Enable framebuffer console under vmwgfx by default"
-   help
-  Choose this option if you are shipping a new vmwgfx
-  userspace driver that supports using the kernel driver.
-
 config DRM_VMWGFX_MKSSTATS
bool "Enable mksGuestStats instrumentation of vmwgfx by default"
depends on DRM_VMWGFX
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 68e350f410ad..2a644f035597 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -12,6 +12,4 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o 
vmwgfx_drv.o \
vmwgfx_devcaps.o ttm_object.o vmwgfx_system_manager.o \
vmwgfx_gem.o
 
-vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o
-
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b909a3ce9af3..df7496b74da5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -35,6 +35,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -52,9 +53,6 @@
 
 #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
 
-#define VMW_MIN_INITIAL_WIDTH 800
-#define VMW_MIN_INITIAL_HEIGHT 600
-
 /*
  * Fully encoded drm commands. Might move to vmw_drm.h
  */
@@ -265,7 +263,6 @@ static const struct pci_device_id vmw_pci_id_list[] = {
 };
 MODULE_DEVICE_TABLE(pci, vmw_pci_id_list);
 
-static int enable_fbdev = IS_ENABLED(CONFIG_DRM_VMWGFX_FBCON);
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
 static int vmw_restrict_dma_mask;
@@ -275,8 +272,6 @@ static int vmw_probe(struct pci_dev *, const struct 
pci_device_id *);
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
  void *ptr);
 
-MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev");
-module_param_named(enable_fbdev, enable_fbdev, int, 0600);
 MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages");
 module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600);
 MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
@@ -626,8 +621,8 @@ static void vmw_get_initial_size(struct vmw_private 
*dev_priv)
width = vmw_read(dev_priv, SVGA_REG_WIDTH);
height = vmw_read(dev_priv, SVGA_REG_HEIGHT);
 
-   width = max_t(uint32_t, width, VMW_MIN_INITIAL_WIDTH);
-   height = max_t(uint32_t, height, VMW_MIN_INITIAL_HEIGHT);
+   width = max_t(uint32_t, width, VMWGFX_MIN_INITIAL_WIDTH);
+   height = max_t(uint32_t, height, VMWGFX_MIN_INITIAL_HEIGHT);
 
if (width > dev_priv->fb_max_width ||
height > dev_priv->fb_max_height) {
@@ -636,8 +631,8 @@ static void vmw_get_initial_size(struct vmw_private 
*dev_priv)
 * This is a host error and shouldn't occur.
 */
 
-   width = VMW_MIN_INITIAL_WIDTH;
-   height = VMW_MIN_INITIAL_HEIGHT;
+   width  = VMWGFX_MIN_INITIAL_WIDTH;
+   height = VMWGFX_MIN_INITIAL_HEIGHT;
}
 
dev_priv->initial_width = width;
@@ -886,9 +881,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
 
dev_priv->assume_16bpp

[PATCH 10/16] drm/vmwgfx: Refactor ttm reference object hashtable to use linux/hashtable.

2022-10-17 Thread Zack Rusin

From: Maaz Mombasawala 

This is part of an effort to move from the vmwgfx_open_hash hashtable to
linux/hashtable implementation.
Refactor the ref_hash hashtable, used for fast lookup of reference objects
associated with a ttm file.
This also exposed a problem related to inconsistently using 32-bit and
64-bit keys with this hashtable. The hash function used changes depending
on the size of the type, and results are not consistent across numbers,
for example, hash_32(329) = 329, but hash_long(329) = 328. This would
cause the lookup to fail for objects already in the hashtable, since keys
of different sizes were being passed during adding and lookup. This was
not an issue before because vmwgfx_open_hash always used hash_long.
Fix this by always using 64-bit keys for this hashtable, which means that
hash_long is always used.

Signed-off-by: Maaz Mombasawala 
Reviewed-by: Zack Rusin 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/ttm_object.c | 91 -
 drivers/gpu/drm/vmwgfx/ttm_object.h | 12 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 3 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c 
b/drivers/gpu/drm/vmwgfx/ttm_object.c
index 9546b121bc22..c07b81fbc495 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.c
@@ -52,9 +52,12 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_IMPORT_NS(DMA_BUF);
 
+#define VMW_TTM_OBJECT_REF_HT_ORDER 10
+
 /**
  * struct ttm_object_file
  *
@@ -75,7 +78,7 @@ struct ttm_object_file {
struct ttm_object_device *tdev;
spinlock_t lock;
struct list_head ref_list;
-   struct vmwgfx_open_hash ref_hash;
+   DECLARE_HASHTABLE(ref_hash, VMW_TTM_OBJECT_REF_HT_ORDER);
struct kref refcount;
 };
 
@@ -136,6 +139,36 @@ ttm_object_file_ref(struct ttm_object_file *tfile)
return tfile;
 }
 
+static int ttm_tfile_find_ref_rcu(struct ttm_object_file *tfile,
+ uint64_t key,
+ struct vmwgfx_hash_item **p_hash)
+{
+   struct vmwgfx_hash_item *hash;
+
+   hash_for_each_possible_rcu(tfile->ref_hash, hash, head, key) {
+   if (hash->key == key) {
+   *p_hash = hash;
+   return 0;
+   }
+   }
+   return -EINVAL;
+}
+
+static int ttm_tfile_find_ref(struct ttm_object_file *tfile,
+ uint64_t key,
+ struct vmwgfx_hash_item **p_hash)
+{
+   struct vmwgfx_hash_item *hash;
+
+   hash_for_each_possible(tfile->ref_hash, hash, head, key) {
+   if (hash->key == key) {
+   *p_hash = hash;
+   return 0;
+   }
+   }
+   return -EINVAL;
+}
+
 static void ttm_object_file_destroy(struct kref *kref)
 {
struct ttm_object_file *tfile =
@@ -238,14 +271,13 @@ void ttm_base_object_unref(struct ttm_base_object 
**p_base)
  * Return: A pointer to the object if successful or NULL otherwise.
  */
 struct ttm_base_object *
-ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key)
+ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key)
 {
struct vmwgfx_hash_item *hash;
-   struct vmwgfx_open_hash *ht = >ref_hash;
int ret;
 
rcu_read_lock();
-   ret = vmwgfx_ht_find_item_rcu(ht, key, );
+   ret = ttm_tfile_find_ref_rcu(tfile, key, );
if (ret) {
rcu_read_unlock();
return NULL;
@@ -257,15 +289,14 @@ ttm_base_object_noref_lookup(struct ttm_object_file 
*tfile, uint32_t key)
 EXPORT_SYMBOL(ttm_base_object_noref_lookup);
 
 struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile,
-  uint32_t key)
+  uint64_t key)
 {
struct ttm_base_object *base = NULL;
struct vmwgfx_hash_item *hash;
-   struct vmwgfx_open_hash *ht = >ref_hash;
int ret;
 
rcu_read_lock();
-   ret = vmwgfx_ht_find_item_rcu(ht, key, );
+   ret = ttm_tfile_find_ref_rcu(tfile, key, );
 
if (likely(ret == 0)) {
base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj;
@@ -278,7 +309,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct 
ttm_object_file *tfile,
 }
 
 struct ttm_base_object *
-ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key)
+ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint64_t key)
 {
struct ttm_base_object *base;
 
@@ -297,7 +328,6 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
   bool *existed,
   bool require_existed)
 {
-   struct vmwgfx_open_hash *ht = >ref_hash;
struct ttm_ref_object *ref;
struct vmwgfx_hash_item *hash;
int ret = -EINVAL;
@@ -310,7 +340,7 @@ int

[PATCH 11/16] drm/vmwgfx : Remove vmwgfx_hashtab

2022-10-17 Thread Zack Rusin

From: Maaz Mombasawala 

The vmwgfx driver has migrated from using the hashtable in vmwgfx_hashtab
to the linux/hashtable implementation. Remove the vmwgfx_hashtab from the
driver.

Signed-off-by: Maaz Mombasawala 
Reviewed-by: Martin Krastev 
Reviewed-by: Zack Rusin 
Signed-off-by: Zack Rusin 
---
 Documentation/gpu/todo.rst |  11 --
 drivers/gpu/drm/vmwgfx/Makefile|   2 +-
 drivers/gpu/drm/vmwgfx/ttm_object.c|   8 +-
 drivers/gpu/drm/vmwgfx/ttm_object.h|   2 -
 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|   6 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c| 199 -
 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h|  83 -
 8 files changed, 12 insertions(+), 303 deletions(-)
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 393d218e4a0c..b2c6aaf1edf2 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -651,17 +651,6 @@ See drivers/gpu/drm/amd/display/TODO for tasks.
 
 Contact: Harry Wentland, Alex Deucher
 
-vmwgfx: Replace hashtable with Linux' implementation
-
-
-The vmwgfx driver uses its own hashtable implementation. Replace the
-code with Linux' implementation and update the callers. It's mostly a
-refactoring task, but the interfaces are different.
-
-Contact: Zack Rusin, Thomas Zimmermann 
-
-Level: Intermediate
-
 Bootsplash
 ==
 
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index eee73b9aa404..68e350f410ad 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_hashtab.o vmwgfx_kms.o 
vmwgfx_drv.o \
+vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \
vmwgfx_cmd.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
vmwgfx_overlay.o vmwgfx_gmrid_manager.o vmwgfx_fence.o \
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c 
b/drivers/gpu/drm/vmwgfx/ttm_object.c
index c07b81fbc495..932b125ebf3d 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.c
@@ -284,7 +284,7 @@ ttm_base_object_noref_lookup(struct ttm_object_file *tfile, 
uint64_t key)
}
 
__release(RCU);
-   return drm_hash_entry(hash, struct ttm_ref_object, hash)->obj;
+   return hlist_entry(hash, struct ttm_ref_object, hash)->obj;
 }
 EXPORT_SYMBOL(ttm_base_object_noref_lookup);
 
@@ -299,7 +299,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct 
ttm_object_file *tfile,
ret = ttm_tfile_find_ref_rcu(tfile, key, );
 
if (likely(ret == 0)) {
-   base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj;
+   base = hlist_entry(hash, struct ttm_ref_object, hash)->obj;
if (!kref_get_unless_zero(>refcount))
base = NULL;
}
@@ -343,7 +343,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
ret = ttm_tfile_find_ref_rcu(tfile, base->handle, );
 
if (ret == 0) {
-   ref = drm_hash_entry(hash, struct ttm_ref_object, hash);
+   ref = hlist_entry(hash, struct ttm_ref_object, hash);
if (kref_get_unless_zero(>kref)) {
rcu_read_unlock();
break;
@@ -407,7 +407,7 @@ int ttm_ref_object_base_unref(struct ttm_object_file *tfile,
spin_unlock(>lock);
return -EINVAL;
}
-   ref = drm_hash_entry(hash, struct ttm_ref_object, hash);
+   ref = hlist_entry(hash, struct ttm_ref_object, hash);
kref_put(>kref, ttm_ref_object_release);
spin_unlock(>lock);
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h 
b/drivers/gpu/drm/vmwgfx/ttm_object.h
index 67f30d589e27..f0ebbe340ad6 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.h
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.h
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#include "vmwgfx_hashtab.h"
-
 /**
  * enum ttm_object_type
  *
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 142aef686fcd..47bc0b411055 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -88,7 +88,7 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man,
 
hash_for_each_possible_rcu(man->resources, hash, head, key) {
if (hash->key == key)
-   return drm_hash_entry(hash, struct vmw_cmdbuf_res, 
hash)->res;
+   return hlist_entry(hash, struct vmw_cmdbuf_res, 
hash)->res;
}

[PATCH 14/16] drm/vmwgfx: Remove explicit and broken vblank handling

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

The explicit vblank handling was never finished. The driver never had
the full implementation of vblank and what was there is emulated
by DRM when the driver doesn't pretend to be implementing it itself.

Let DRM handle the vblank emulation and stop pretending the driver is
doing anything special with vblank. In the future it would make sense
to implement helpers for full vblank handling because vkms and
amdgpu_vkms already have that code. Exporting it to common helpers and
having all three drivers share it would make sense (that would be largely
just to allow more of igt to run).

Signed-off-by: Zack Rusin 
Reviewed-by: Maaz Mombasawala 
Reviewed-by: Martin Krastev 
Reviewed-by: Michael Banack 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  |  3 ---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c  | 34 
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c  |  8 ---
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 27 --
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 26 -
 5 files changed, 98 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index ad470e54d586..4eb7339dd121 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1208,9 +1208,6 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv,
 bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
uint32_t pitch,
uint32_t height);
-u32 vmw_get_vblank_counter(struct drm_crtc *crtc);
-int vmw_enable_vblank(struct drm_crtc *crtc);
-void vmw_disable_vblank(struct drm_crtc *crtc);
 int vmw_kms_present(struct vmw_private *dev_priv,
struct drm_file *file_priv,
struct vmw_framebuffer *vfb,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index f9eeb8e17f22..bb6366b89e9a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -31,7 +31,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "vmwgfx_kms.h"
 
@@ -968,15 +967,6 @@ void vmw_du_crtc_atomic_begin(struct drm_crtc *crtc,
 void vmw_du_crtc_atomic_flush(struct drm_crtc *crtc,
  struct drm_atomic_state *state)
 {
-   struct drm_pending_vblank_event *event = crtc->state->event;
-
-   if (event) {
-   crtc->state->event = NULL;
-
-   spin_lock_irq(>dev->event_lock);
-   drm_crtc_send_vblank_event(crtc, event);
-   spin_unlock_irq(>dev->event_lock);
-   }
 }
 
 
@@ -2292,30 +2282,6 @@ bool vmw_kms_validate_mode_vram(struct vmw_private 
*dev_priv,
 dev_priv->max_primary_mem : dev_priv->vram_size);
 }
 
-
-/*
- * Function called by DRM code called with vbl_lock held.
- */
-u32 vmw_get_vblank_counter(struct drm_crtc *crtc)
-{
-   return 0;
-}
-
-/*
- * Function called by DRM code called with vbl_lock held.
- */
-int vmw_enable_vblank(struct drm_crtc *crtc)
-{
-   return -EINVAL;
-}
-
-/*
- * Function called by DRM code called with vbl_lock held.
- */
-void vmw_disable_vblank(struct drm_crtc *crtc)
-{
-}
-
 /**
  * vmw_du_update_layout - Update the display unit with topology from resolution
  * plugin and generate DRM uevent
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b8761f16dd78..a56e5d0ca3c6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -28,7 +28,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "vmwgfx_kms.h"
 
@@ -235,9 +234,6 @@ static const struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
.atomic_duplicate_state = vmw_du_crtc_duplicate_state,
.atomic_destroy_state = vmw_du_crtc_destroy_state,
.set_config = drm_atomic_helper_set_config,
-   .get_vblank_counter = vmw_get_vblank_counter,
-   .enable_vblank = vmw_enable_vblank,
-   .disable_vblank = vmw_disable_vblank,
 };
 
 
@@ -507,10 +503,6 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv)
dev_priv->ldu_priv->last_num_active = 0;
dev_priv->ldu_priv->fb = NULL;
 
-   ret = drm_vblank_init(dev, num_display_units);
-   if (ret != 0)
-   goto err_free;
-
vmw_kms_create_implicit_placement_property(dev_priv);
 
for (i = 0; i < num_display_units; ++i) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index ecd3c2fc978b..d7b71697d24e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "vmwgfx_kms.h"
 
@@ -320,9 +319,6 @@ static const struct drm_crtc_funcs 
vmw_screen_object_crtc_funcs = {
.atomic_destroy_state = vmw_du_crtc_destroy_state,
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
-

[PATCH 15/16] drm/vmwgfx: Add a mksstat counter for cotable resizes

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

There's been a lot of cotable resizes on startup which we can track
by adding a mks stat to measure both the invocation count and
time spent doing cotable resizes.

This is only used if kernel is configured with CONFIG_DRM_VMWGFX_MKSSTATS
The stats are collected on the host size inside the vmware-stats.log
file.

Signed-off-by: Zack Rusin 
Reviewed-by: Michael Banack 
Reviewed-by: Martin Krastev 
Reviewed-by: Maaz Mombasawala 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 13 +++--
 drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h |  2 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 14 +++---
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index 79b30dc9d825..a4c30f950d7c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -33,6 +33,7 @@
 #include 
 
 #include "vmwgfx_drv.h"
+#include "vmwgfx_mksstat.h"
 #include "vmwgfx_resource_priv.h"
 #include "vmwgfx_so.h"
 
@@ -395,9 +396,12 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
int ret;
size_t i;
 
+   MKS_STAT_TIME_DECL(MKSSTAT_KERN_COTABLE_RESIZE);
+   MKS_STAT_TIME_PUSH(MKSSTAT_KERN_COTABLE_RESIZE);
+
ret = vmw_cotable_readback(res);
if (ret)
-   return ret;
+   goto out_done;
 
cur_size_read_back = vcotbl->size_read_back;
vcotbl->size_read_back = old_size_read_back;
@@ -411,7 +415,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
true, true, vmw_bo_bo_free, );
if (ret) {
DRM_ERROR("Failed initializing new cotable MOB.\n");
-   return ret;
+   goto out_done;
}
 
bo = >base;
@@ -485,6 +489,8 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
/* Release the pin acquired in vmw_bo_init */
ttm_bo_unpin(bo);
 
+   MKS_STAT_TIME_POP(MKSSTAT_KERN_COTABLE_RESIZE);
+
return 0;
 
 out_map_new:
@@ -494,6 +500,9 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
ttm_bo_unreserve(bo);
vmw_bo_unreference();
 
+out_done:
+   MKS_STAT_TIME_POP(MKSSTAT_KERN_COTABLE_RESIZE);
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h
index 0509f55f07b4..ede74c7fdbbf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h
@@ -29,6 +29,7 @@
 #define _VMWGFX_MKSSTAT_H_
 
 #include 
+#include 
 
 /* Reservation marker for mksstat pid's */
 #define MKSSTAT_PID_RESERVED -1
@@ -41,6 +42,7 @@
 
 typedef enum {
MKSSTAT_KERN_EXECBUF, /* vmw_execbuf_ioctl */
+   MKSSTAT_KERN_COTABLE_RESIZE,
 
MKSSTAT_KERN_COUNT /* Reserved entry; always last */
 } mksstat_kern_stats_t;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index a6cea35eaa01..fa713207877f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -85,7 +85,12 @@ struct rpc_channel {
u32 cookie_low;
 };
 
-
+/* Kernel mksGuestStats counter names and desciptions; same order as enum 
mksstat_kern_stats_t */
+static const char* const mksstat_kern_name_desc[MKSSTAT_KERN_COUNT][2] =
+{
+   { "vmw_execbuf_ioctl", "vmw_execbuf_ioctl" },
+   { "vmw_cotable_resize", "vmw_cotable_resize" },
+};
 
 /**
  * vmw_open_channel
@@ -695,12 +700,6 @@ static inline void hypervisor_ppn_remove(PPN64 pfn)
 /* Header to the text description of mksGuestStat instance descriptor */
 #define MKSSTAT_KERNEL_DESCRIPTION "vmwgfx"
 
-/* Kernel mksGuestStats counter names and desciptions; same order as enum 
mksstat_kern_stats_t */
-static const char* const mksstat_kern_name_desc[MKSSTAT_KERN_COUNT][2] =
-{
-   { "vmw_execbuf_ioctl", "vmw_execbuf_ioctl" },
-};
-
 /**
  * mksstat_init_record: Initializes an MKSGuestStatCounter-based record
  * for the respective mksGuestStat index.
@@ -786,6 +785,7 @@ static int mksstat_init_kern_id(struct page **ppage)
/* Set up all kernel-internal counters and corresponding structures */
pstrs_acc = pstrs;
pstrs_acc = mksstat_init_record_time(MKSSTAT_KERN_EXECBUF, pstat, 
pinfo, pstrs_acc);
+   pstrs_acc = mksstat_init_record_time(MKSSTAT_KERN_COTABLE_RESIZE, 
pstat, pinfo, pstrs_acc);
 
/* Add new counters above, in their order of appearance in 
mksstat_kern_stats_t */
 
-- 
2.34.1

[PATCH 08/16] drm/vmwgfx: Support cursor surfaces with mob cursor

2022-10-17 Thread Zack Rusin

From: Michael Banack 

Add support for cursor surfaces when using mob cursors.

Signed-off-by: Michael Banack 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 78 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h |  1 +
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 355dc807e898..966625943c09 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -79,7 +79,7 @@ static void vmw_send_define_cursor_cmd(struct vmw_private 
*dev_priv,
   other fallible KMS-atomic resources at prepare_fb */
cmd = VMW_CMD_RESERVE(dev_priv, cmd_size);
 
-   if (unlikely(cmd == NULL))
+   if (unlikely(!cmd))
return;
 
memset(cmd, 0, sizeof(*cmd));
@@ -104,7 +104,7 @@ static void vmw_cursor_update_image(struct vmw_private 
*dev_priv,
u32 *image, u32 width, u32 height,
u32 hotspotX, u32 hotspotY)
 {
-   if (vps->cursor.bo != NULL)
+   if (vps->cursor.bo)
vmw_cursor_write_mobid(dev_priv, vps);
else
vmw_send_define_cursor_cmd(dev_priv, image, width, height,
@@ -185,7 +185,7 @@ static bool vmw_du_cursor_plane_mob_has_changed(struct 
vmw_plane_state *old_vps,
u32 size;
 
// If either of them aren't using CursorMobs, assume changed.
-   if (old_vps->cursor.bo == NULL || new_vps->cursor.bo == NULL)
+   if (!old_vps->cursor.bo || !new_vps->cursor.bo)
return true;
 
// If either of them failed to map, assume changed.
@@ -210,7 +210,7 @@ static bool vmw_du_cursor_plane_mob_has_changed(struct 
vmw_plane_state *old_vps,
 
 static void vmw_du_destroy_cursor_mob(struct ttm_buffer_object **bo)
 {
-   if (*bo == NULL)
+   if (!(*bo))
return;
 
ttm_bo_unpin(*bo);
@@ -224,14 +224,14 @@ static void vmw_du_put_cursor_mob(struct vmw_cursor_plane 
*vcp,
 {
u32 i;
 
-   if (vps->cursor.bo == NULL)
+   if (!vps->cursor.bo)
return;
 
vmw_du_cursor_plane_unmap_cm(vps);
 
/* Look for a free slot to return this mob to the cache. */
for (i = 0; i < ARRAY_SIZE(vcp->cursor_mobs); i++) {
-   if (vcp->cursor_mobs[i] == NULL) {
+   if (!vcp->cursor_mobs[i]) {
vcp->cursor_mobs[i] = vps->cursor.bo;
vps->cursor.bo = NULL;
return;
@@ -273,7 +273,7 @@ static int vmw_du_get_cursor_mob(struct vmw_cursor_plane 
*vcp,
vps->base.crtc_h > cursor_max_dim)
return -EINVAL;
 
-   if (vps->cursor.bo != NULL) {
+   if (vps->cursor.bo) {
if (vps->cursor.bo->base.size >= size)
return 0;
vmw_du_put_cursor_mob(vcp, vps);
@@ -281,7 +281,7 @@ static int vmw_du_get_cursor_mob(struct vmw_cursor_plane 
*vcp,
 
/* Look for an unused mob in the cache. */
for (i = 0; i < ARRAY_SIZE(vcp->cursor_mobs); i++) {
-   if (vcp->cursor_mobs[i] != NULL &&
+   if (vcp->cursor_mobs[i] &&
vcp->cursor_mobs[i]->base.size >= size) {
vps->cursor.bo = vcp->cursor_mobs[i];
vcp->cursor_mobs[i] = NULL;
@@ -359,7 +359,7 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf,
 
cmd = container_of(header, struct vmw_dma_cmd, header);
 
-   /* No snooper installed */
+   /* No snooper installed, nothing to copy */
if (!srf->snooper.image)
return;
 
@@ -459,7 +459,8 @@ void vmw_kms_cursor_post_execbuf(struct vmw_private 
*dev_priv)
list_for_each_entry(crtc, >mode_config.crtc_list, head) {
du = vmw_crtc_to_du(crtc);
if (!du->cursor_surface ||
-   du->cursor_age == du->cursor_surface->snooper.age)
+   du->cursor_age == du->cursor_surface->snooper.age ||
+   !du->cursor_surface->snooper.image)
continue;
 
du->cursor_age = du->cursor_surface->snooper.age;
@@ -555,7 +556,7 @@ vmw_du_cursor_plane_map_cm(struct vmw_plane_state *vps)
u32 size = vmw_du_cursor_mob_size(vps->base.crtc_w, vps->base.crtc_h);
struct ttm_buffer_object *bo = vps->cursor.bo;
 
-   if (bo == NULL)
+   if (!bo)
return -EINVAL;
 
if (bo->base.size < size)
@@ -607,7 +608,7 @@ vmw_du_cursor_plane_unmap_cm(struct vmw_plane_state *vps)
if (!vps->cursor.mapped)
return 0;
 
-   if (bo == NULL)
+   if (!bo)
return 0;
 
ret = ttm_bo_reserve(bo, true, false, NULL);
@@ -639,7 +640,12 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane,
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
bool dummy;
 
-

[PATCH 09/16] drm/vmwgfx: Diff cursors when using cmds

2022-10-17 Thread Zack Rusin

From: Michael Banack 

Extend the cursor diffing support to support the command-path.

Signed-off-by: Michael Banack 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 119 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h |   2 +
 2 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 966625943c09..492b3e3f430b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -53,8 +53,10 @@ void vmw_du_cleanup(struct vmw_display_unit *du)
  */
 
 static int vmw_du_cursor_plane_unmap_cm(struct vmw_plane_state *vps);
-static void vmw_cursor_write_mobid(struct vmw_private *dev_priv,
-  struct vmw_plane_state *vps);
+static void vmw_cursor_update_mob(struct vmw_private *dev_priv,
+ struct vmw_plane_state *vps,
+ u32 *image, u32 width, u32 height,
+ u32 hotspotX, u32 hotspotY);
 
 struct vmw_svga_fifo_cmd_define_cursor {
u32 cmd;
@@ -105,7 +107,10 @@ static void vmw_cursor_update_image(struct vmw_private 
*dev_priv,
u32 hotspotX, u32 hotspotY)
 {
if (vps->cursor.bo)
-   vmw_cursor_write_mobid(dev_priv, vps);
+   vmw_cursor_update_mob(dev_priv, vps, image,
+ vps->base.crtc_w, vps->base.crtc_h,
+ hotspotX, hotspotY);
+
else
vmw_send_define_cursor_cmd(dev_priv, image, width, height,
   hotspotX, hotspotY);
@@ -151,61 +156,57 @@ static void vmw_cursor_update_mob(struct vmw_private 
*dev_priv,
alpha_header->height = height;
 
memcpy(header + 1, image, image_size);
-}
-
-
-/**
- * vmw_cursor_write_mobid - Update cursor via CursorMob mechanism
- *
- * Called from inside vmw_du_cursor_plane_atomic_update to actually
- * make the cursor-image live.
- *
- * @dev_priv: device to work with
- * @vps: DRM plane_state
- */
-static void vmw_cursor_write_mobid(struct vmw_private *dev_priv,
-  struct vmw_plane_state *vps)
-{
vmw_write(dev_priv, SVGA_REG_CURSOR_MOBID,
  vps->cursor.bo->resource->start);
 }
 
+
 static u32 vmw_du_cursor_mob_size(u32 w, u32 h)
 {
return w * h * sizeof(u32) + sizeof(SVGAGBCursorHeader);
 }
 
-
-static bool vmw_du_cursor_plane_mob_has_changed(struct vmw_plane_state 
*old_vps,
-   struct vmw_plane_state *new_vps)
+/**
+ * vmw_du_cursor_plane_acquire_image -- Acquire the image data
+ */
+static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps)
 {
-   void *old_mob;
-   void *new_mob;
bool dummy;
-   u32 size;
-
-   // If either of them aren't using CursorMobs, assume changed.
-   if (!old_vps->cursor.bo || !new_vps->cursor.bo)
-   return true;
+   if (vps->surf) {
+   if (vps->surf_mapped)
+   return vmw_bo_map_and_cache(vps->surf->res.backup);
+   return vps->surf->snooper.image;
+   } else if (vps->bo)
+   return ttm_kmap_obj_virtual(>bo->map, );
+   return NULL;
+}
 
-   // If either of them failed to map, assume changed.
-   if (!old_vps->cursor.mapped || !new_vps->cursor.mapped)
-   return true;
+static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps,
+   struct vmw_plane_state *new_vps)
+{
+   void *old_image;
+   void *new_image;
+   u32 size;
+   bool changed;
 
if (old_vps->base.crtc_w != new_vps->base.crtc_w ||
old_vps->base.crtc_h != new_vps->base.crtc_h)
return true;
 
-   size = vmw_du_cursor_mob_size(new_vps->base.crtc_w,
- new_vps->base.crtc_h);
+   if (old_vps->cursor.hotspot_x != new_vps->cursor.hotspot_x ||
+   old_vps->cursor.hotspot_y != new_vps->cursor.hotspot_y)
+   return true;
 
-   old_mob = ttm_kmap_obj_virtual(_vps->cursor.map, );
-   new_mob = ttm_kmap_obj_virtual(_vps->cursor.map, );
+   size = new_vps->base.crtc_w * new_vps->base.crtc_h * sizeof(u32);
 
-   if (memcmp(old_mob, new_mob, size) != 0)
-   return true;
+   old_image = vmw_du_cursor_plane_acquire_image(old_vps);
+   new_image = vmw_du_cursor_plane_acquire_image(new_vps);
 
-   return false;
+   changed = false;
+   if (old_image && new_image)
+   changed = memcmp(old_image, new_image, size) != 0;
+
+   return changed;
 }
 
 static void vmw_du_destroy_cursor_mob(struct ttm_buffer_object **bo)
@@ -733,6 +734,7 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
return -ENOMEM;
} else if

[PATCH 12/16] drm/vmwgfx: Do not allow invalid bpp's for dumb buffers

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

Dumb buffers allow a very limited set of formats. Basically everything
apart from 1, 2 and 4 is expected to return an error. Make vmwgfx
follow those guidelines.

This fixes igt's dumb_buffer invalid_bpp test on vmwgfx.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Reviewed-by: Maaz Mombasawala 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 822251aaab0a..d218b15953e0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -807,9 +807,23 @@ int vmw_dumb_create(struct drm_file *file_priv,
 {
struct vmw_private *dev_priv = vmw_priv(dev);
struct vmw_buffer_object *vbo;
+   int cpp = DIV_ROUND_UP(args->bpp, 8);
int ret;
 
-   args->pitch = args->width * ((args->bpp + 7) / 8);
+   switch (cpp) {
+   case 1: /* DRM_FORMAT_C8 */
+   case 2: /* DRM_FORMAT_RGB565 */
+   case 4: /* DRM_FORMAT_XRGB */
+   break;
+   default:
+   /*
+* Dumb buffers don't allow anything else.
+* This is tested via IGT's dumb_buffers
+*/
+   return -EINVAL;
+   }
+
+   args->pitch = args->width * cpp;
args->size = ALIGN(args->pitch * args->height, PAGE_SIZE);
 
ret = vmw_gem_object_create_with_handle(dev_priv, file_priv,
-- 
2.34.1

[PATCH 07/16] drm/vmwgfx: Start diffing new mob cursors against old ones

2022-10-17 Thread Zack Rusin

From: Michael Banack 

Avoid making the SVGA device do extra work if the new cursor image
matches the old one.

Signed-off-by: Michael Banack 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 95 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 12 ++--
 2 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index d6e14accaaed..355dc807e898 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -52,11 +52,9 @@ void vmw_du_cleanup(struct vmw_display_unit *du)
  * Display Unit Cursor functions
  */
 
-static void vmw_cursor_update_mob(struct vmw_private *dev_priv,
- struct vmw_plane_state *vps,
- u32 *image, u32 width, u32 height,
- u32 hotspotX, u32 hotspotY);
 static int vmw_du_cursor_plane_unmap_cm(struct vmw_plane_state *vps);
+static void vmw_cursor_write_mobid(struct vmw_private *dev_priv,
+  struct vmw_plane_state *vps);
 
 struct vmw_svga_fifo_cmd_define_cursor {
u32 cmd;
@@ -107,9 +105,7 @@ static void vmw_cursor_update_image(struct vmw_private 
*dev_priv,
u32 hotspotX, u32 hotspotY)
 {
if (vps->cursor.bo != NULL)
-   vmw_cursor_update_mob(dev_priv, vps, image,
- width, height,
- hotspotX, hotspotY);
+   vmw_cursor_write_mobid(dev_priv, vps);
else
vmw_send_define_cursor_cmd(dev_priv, image, width, height,
   hotspotX, hotspotY);
@@ -155,6 +151,21 @@ static void vmw_cursor_update_mob(struct vmw_private 
*dev_priv,
alpha_header->height = height;
 
memcpy(header + 1, image, image_size);
+}
+
+
+/**
+ * vmw_cursor_write_mobid - Update cursor via CursorMob mechanism
+ *
+ * Called from inside vmw_du_cursor_plane_atomic_update to actually
+ * make the cursor-image live.
+ *
+ * @dev_priv: device to work with
+ * @vps: DRM plane_state
+ */
+static void vmw_cursor_write_mobid(struct vmw_private *dev_priv,
+  struct vmw_plane_state *vps)
+{
vmw_write(dev_priv, SVGA_REG_CURSOR_MOBID,
  vps->cursor.bo->resource->start);
 }
@@ -164,6 +175,39 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h)
return w * h * sizeof(u32) + sizeof(SVGAGBCursorHeader);
 }
 
+
+static bool vmw_du_cursor_plane_mob_has_changed(struct vmw_plane_state 
*old_vps,
+   struct vmw_plane_state *new_vps)
+{
+   void *old_mob;
+   void *new_mob;
+   bool dummy;
+   u32 size;
+
+   // If either of them aren't using CursorMobs, assume changed.
+   if (old_vps->cursor.bo == NULL || new_vps->cursor.bo == NULL)
+   return true;
+
+   // If either of them failed to map, assume changed.
+   if (!old_vps->cursor.mapped || !new_vps->cursor.mapped)
+   return true;
+
+   if (old_vps->base.crtc_w != new_vps->base.crtc_w ||
+   old_vps->base.crtc_h != new_vps->base.crtc_h)
+   return true;
+
+   size = vmw_du_cursor_mob_size(new_vps->base.crtc_w,
+ new_vps->base.crtc_h);
+
+   old_mob = ttm_kmap_obj_virtual(_vps->cursor.map, );
+   new_mob = ttm_kmap_obj_virtual(_vps->cursor.map, );
+
+   if (memcmp(old_mob, new_mob, size) != 0)
+   return true;
+
+   return false;
+}
+
 static void vmw_du_destroy_cursor_mob(struct ttm_buffer_object **bo)
 {
if (*bo == NULL)
@@ -704,9 +748,10 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
struct vmw_private *dev_priv = vmw_priv(crtc->dev);
struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state);
+   struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state);
s32 hotspot_x, hotspot_y;
-   void *virtual;
bool dummy;
+   void *image;
 
hotspot_x = du->hotspot_x;
hotspot_y = du->hotspot_y;
@@ -726,23 +771,32 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 
if (vps->surf != NULL) {
du->cursor_age = du->cursor_surface->snooper.age;
+   image = vps->surf->snooper.image;
+   } else
+   image = ttm_kmap_obj_virtual(>bo->map, );
 
-   vmw_cursor_update_image(dev_priv, vps,
-   vps->surf->snooper.image,
+   if (vps->cursor.bo != NULL)
+   vmw_cursor_update_mob(dev_priv, vps, image,
+ new_state->crtc_w,
+ new_state->crtc_h,
+ hotspot_x, hotspot_y);
+
+   if

[PATCH 06/16] drm/vmwgfx: Clean up cursor mobs

2022-10-17 Thread Zack Rusin

From: Michael Banack 

Clean up the cursor mob path by moving ownership of the mobs into the
plane_state, and just leaving a cache of unused mobs in the plane
itself.

Signed-off-by: Michael Banack 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 425 
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h |  19 +-
 2 files changed, 253 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 214829c32ed8..d6e14accaaed 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -53,33 +53,27 @@ void vmw_du_cleanup(struct vmw_display_unit *du)
  */
 
 static void vmw_cursor_update_mob(struct vmw_private *dev_priv,
- struct ttm_buffer_object *bo,
- struct ttm_bo_kmap_obj *map,
+ struct vmw_plane_state *vps,
  u32 *image, u32 width, u32 height,
  u32 hotspotX, u32 hotspotY);
+static int vmw_du_cursor_plane_unmap_cm(struct vmw_plane_state *vps);
 
 struct vmw_svga_fifo_cmd_define_cursor {
u32 cmd;
SVGAFifoCmdDefineAlphaCursor cursor;
 };
 
-static void vmw_cursor_update_image(struct vmw_private *dev_priv,
-   struct ttm_buffer_object *cm_bo,
-   struct ttm_bo_kmap_obj *cm_map,
-   u32 *image, u32 width, u32 height,
-   u32 hotspotX, u32 hotspotY)
+/**
+ * vmw_send_define_cursor_cmd - queue a define cursor command
+ */
+static void vmw_send_define_cursor_cmd(struct vmw_private *dev_priv,
+  u32 *image, u32 width, u32 height,
+  u32 hotspotX, u32 hotspotY)
 {
struct vmw_svga_fifo_cmd_define_cursor *cmd;
const u32 image_size = width * height * sizeof(*image);
const u32 cmd_size = sizeof(*cmd) + image_size;
 
-   if (cm_bo != NULL) {
-   vmw_cursor_update_mob(dev_priv, cm_bo, cm_map, image,
- width, height,
- hotspotX, hotspotY);
-   return;
-   }
-
/* Try to reserve fifocmd space and swallow any failures;
   such reservations cannot be left unconsumed for long
   under the risk of clogging other fifocmd users, so
@@ -104,9 +98,30 @@ static void vmw_cursor_update_image(struct vmw_private 
*dev_priv,
vmw_cmd_commit_flush(dev_priv, cmd_size);
 }
 
+/**
+ * vmw_cursor_update_image - update the cursor image on the provided plane
+ */
+static void vmw_cursor_update_image(struct vmw_private *dev_priv,
+   struct vmw_plane_state *vps,
+   u32 *image, u32 width, u32 height,
+   u32 hotspotX, u32 hotspotY)
+{
+   if (vps->cursor.bo != NULL)
+   vmw_cursor_update_mob(dev_priv, vps, image,
+ width, height,
+ hotspotX, hotspotY);
+   else
+   vmw_send_define_cursor_cmd(dev_priv, image, width, height,
+  hotspotX, hotspotY);
+}
+
+
 /**
  * vmw_cursor_update_mob - Update cursor vis CursorMob mechanism
  *
+ * Called from inside vmw_du_cursor_plane_atomic_update to actually
+ * make the cursor-image live.
+ *
  * @dev_priv: device to work with
  * @bo: BO for the MOB
  * @map: kmap obj for the BO
@@ -117,8 +132,7 @@ static void vmw_cursor_update_image(struct vmw_private 
*dev_priv,
  * @hotspotY: cursor hotspot Y
  */
 static void vmw_cursor_update_mob(struct vmw_private *dev_priv,
- struct ttm_buffer_object *bo,
- struct ttm_bo_kmap_obj *map,
+ struct vmw_plane_state *vps,
  u32 *image, u32 width, u32 height,
  u32 hotspotX, u32 hotspotY)
 {
@@ -127,11 +141,11 @@ static void vmw_cursor_update_mob(struct vmw_private 
*dev_priv,
const u32 image_size = width * height * sizeof(*image);
bool dummy;
 
-   BUG_ON(!image);
-
-   header = (SVGAGBCursorHeader *)ttm_kmap_obj_virtual(map, );
+   header = ttm_kmap_obj_virtual(>cursor.map, );
alpha_header = >header.alphaHeader;
 
+   memset(header, 0, sizeof(*header));
+
header->type = SVGA_ALPHA_CURSOR;
header->sizeInBytes = image_size;
 
@@ -141,102 +155,116 @@ static void vmw_cursor_update_mob(struct vmw_private 
*dev_priv,
alpha_header->height = height;
 
memcpy(header + 1, image, image_size);
+   vmw_write(dev_priv, SVGA_REG_CURSOR_MOBID,
+ vps->cursor.bo->resource->start);
+}
 
-   vmw_write(dev_priv, SVGA_REG_CURSOR_MOBID,

[PATCH 04/16] drm/vmwgfx: Remove ttm object hashtable

2022-10-17 Thread Zack Rusin

From: Maaz Mombasawala 

The object_hash hashtable for ttm objects is not being used.
Remove it and perform refactoring in ttm_object init function.

Signed-off-by: Maaz Mombasawala 
Reviewed-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/ttm_object.c | 24 ++--
 drivers/gpu/drm/vmwgfx/ttm_object.h |  6 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
 3 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c 
b/drivers/gpu/drm/vmwgfx/ttm_object.c
index 26a55fef1ab5..9546b121bc22 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.c
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**
  *
- * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2022 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -44,13 +44,14 @@
 
 #define pr_fmt(fmt) "[TTM] " fmt
 
+#include "ttm_object.h"
+#include "vmwgfx_drv.h"
+
 #include 
 #include 
 #include 
 #include 
 #include 
-#include "ttm_object.h"
-#include "vmwgfx_drv.h"
 
 MODULE_IMPORT_NS(DMA_BUF);
 
@@ -81,9 +82,7 @@ struct ttm_object_file {
 /*
  * struct ttm_object_device
  *
- * @object_lock: lock that protects the object_hash hash table.
- *
- * @object_hash: hash table for fast lookup of object global names.
+ * @object_lock: lock that protects idr.
  *
  * @object_count: Per device object count.
  *
@@ -92,7 +91,6 @@ struct ttm_object_file {
 
 struct ttm_object_device {
spinlock_t object_lock;
-   struct vmwgfx_open_hash object_hash;
atomic_t object_count;
struct dma_buf_ops ops;
void (*dmabuf_release)(struct dma_buf *dma_buf);
@@ -449,20 +447,15 @@ struct ttm_object_file *ttm_object_file_init(struct 
ttm_object_device *tdev,
 }
 
 struct ttm_object_device *
-ttm_object_device_init(unsigned int hash_order,
-  const struct dma_buf_ops *ops)
+ttm_object_device_init(const struct dma_buf_ops *ops)
 {
struct ttm_object_device *tdev = kmalloc(sizeof(*tdev), GFP_KERNEL);
-   int ret;
 
if (unlikely(tdev == NULL))
return NULL;
 
spin_lock_init(>object_lock);
atomic_set(>object_count, 0);
-   ret = vmwgfx_ht_create(>object_hash, hash_order);
-   if (ret != 0)
-   goto out_no_object_hash;
 
/*
 * Our base is at VMWGFX_NUM_MOB + 1 because we want to create
@@ -477,10 +470,6 @@ ttm_object_device_init(unsigned int hash_order,
tdev->dmabuf_release = tdev->ops.release;
tdev->ops.release = ttm_prime_dmabuf_release;
return tdev;
-
-out_no_object_hash:
-   kfree(tdev);
-   return NULL;
 }
 
 void ttm_object_device_release(struct ttm_object_device **p_tdev)
@@ -491,7 +480,6 @@ void ttm_object_device_release(struct ttm_object_device 
**p_tdev)
 
WARN_ON_ONCE(!idr_is_empty(>idr));
idr_destroy(>idr);
-   vmwgfx_ht_remove(>object_hash);
 
kfree(tdev);
 }
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h 
b/drivers/gpu/drm/vmwgfx/ttm_object.h
index 1a2fa0f83f5f..6870f951b677 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.h
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.h
@@ -1,6 +1,6 @@
 /**
  *
- * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2006-2022 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -262,7 +262,6 @@ extern void ttm_object_file_release(struct ttm_object_file 
**p_tfile);
 /**
  * ttm_object device init - initialize a struct ttm_object_device
  *
- * @hash_order: Order of hash table used to hash the base objects.
  * @ops: DMA buf ops for prime objects of this device.
  *
  * This function is typically called on device initialization to prepare
@@ -270,8 +269,7 @@ extern void ttm_object_file_release(struct ttm_object_file 
**p_tfile);
  */
 
 extern struct ttm_object_device *
-ttm_object_device_init(unsigned int hash_order,
-  const struct dma_buf_ops *ops);
+ttm_object_device_init(const struct dma_buf_ops *ops);
 
 /**
  * ttm_object_device_release - release data held by a ttm_object_device
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 45028e25d490..13b90273eb77 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -994,7 +994,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
goto out_err0;
}
 
-   dev_priv->tdev = ttm_object_device_init(12, _prime_dmabuf_ops);
+   dev_priv->tdev = ttm_object_device_init(_prime_dmabuf_ops);
 
if (unlikely(dev_priv->tdev ==

[PATCH 03/16] drm/vmwgfx: Refactor resource manager's hashtable to use linux/hashtable implementation.

2022-10-17 Thread Zack Rusin

From: Maaz Mombasawala 

Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable
to reduce maintenance burden.
Refactor cmdbuf resource manager to use linux/hashtable.h implementation
as part of this effort.

Signed-off-by: Maaz Mombasawala 
Reviewed-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 62 +-
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 82ef58ccdd42..142aef686fcd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR MIT
 /**
  *
- * Copyright 2014-2015 VMware, Inc., Palo Alto, CA., USA
+ * Copyright 2014-2022 VMware, Inc., Palo Alto, CA., USA
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -28,6 +28,8 @@
 #include "vmwgfx_drv.h"
 #include "vmwgfx_resource_priv.h"
 
+#include 
+
 #define VMW_CMDBUF_RES_MAN_HT_ORDER 12
 
 /**
@@ -59,7 +61,7 @@ struct vmw_cmdbuf_res {
  * @resources and @list are protected by the cmdbuf mutex for now.
  */
 struct vmw_cmdbuf_res_manager {
-   struct vmwgfx_open_hash resources;
+   DECLARE_HASHTABLE(resources, VMW_CMDBUF_RES_MAN_HT_ORDER);
struct list_head list;
struct vmw_private *dev_priv;
 };
@@ -82,14 +84,13 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man,
  u32 user_key)
 {
struct vmwgfx_hash_item *hash;
-   int ret;
unsigned long key = user_key | (res_type << 24);
 
-   ret = vmwgfx_ht_find_item(>resources, key, );
-   if (unlikely(ret != 0))
-   return ERR_PTR(ret);
-
-   return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res;
+   hash_for_each_possible_rcu(man->resources, hash, head, key) {
+   if (hash->key == key)
+   return drm_hash_entry(hash, struct vmw_cmdbuf_res, 
hash)->res;
+   }
+   return ERR_PTR(-EINVAL);
 }
 
 /**
@@ -105,7 +106,7 @@ static void vmw_cmdbuf_res_free(struct 
vmw_cmdbuf_res_manager *man,
struct vmw_cmdbuf_res *entry)
 {
list_del(>head);
-   WARN_ON(vmwgfx_ht_remove_item(>resources, >hash));
+   hash_del_rcu(>hash.head);
vmw_resource_unreference(>res);
kfree(entry);
 }
@@ -159,7 +160,6 @@ void vmw_cmdbuf_res_commit(struct list_head *list)
 void vmw_cmdbuf_res_revert(struct list_head *list)
 {
struct vmw_cmdbuf_res *entry, *next;
-   int ret;
 
list_for_each_entry_safe(entry, next, list, head) {
switch (entry->state) {
@@ -167,8 +167,8 @@ void vmw_cmdbuf_res_revert(struct list_head *list)
vmw_cmdbuf_res_free(entry->man, entry);
break;
case VMW_CMDBUF_RES_DEL:
-   ret = vmwgfx_ht_insert_item(>man->resources, 
>hash);
-   BUG_ON(ret);
+   hash_add_rcu(entry->man->resources, >hash.head,
+   entry->hash.key);
list_move_tail(>head, >man->list);
entry->state = VMW_CMDBUF_RES_COMMITTED;
break;
@@ -199,26 +199,20 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man,
   struct list_head *list)
 {
struct vmw_cmdbuf_res *cres;
-   int ret;
 
cres = kzalloc(sizeof(*cres), GFP_KERNEL);
if (unlikely(!cres))
return -ENOMEM;
 
cres->hash.key = user_key | (res_type << 24);
-   ret = vmwgfx_ht_insert_item(>resources, >hash);
-   if (unlikely(ret != 0)) {
-   kfree(cres);
-   goto out_invalid_key;
-   }
+   hash_add_rcu(man->resources, >hash.head, cres->hash.key);
 
cres->state = VMW_CMDBUF_RES_ADD;
cres->res = vmw_resource_reference(res);
cres->man = man;
list_add_tail(>head, list);
 
-out_invalid_key:
-   return ret;
+   return 0;
 }
 
 /**
@@ -243,24 +237,26 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager 
*man,
  struct list_head *list,
  struct vmw_resource **res_p)
 {
-   struct vmw_cmdbuf_res *entry;
+   struct vmw_cmdbuf_res *entry = NULL;
struct vmwgfx_hash_item *hash;
-   int ret;
+   unsigned long key = user_key | (res_type << 24);
 
-   ret = vmwgfx_ht_find_item(>resources, user_key | (res_type << 24),
-  );
-   if (likely(ret != 0))
+   hash_for_each_possible_rcu(man->resources, hash, head, key) {
+   if (hash->key == key) {
+   entry =

[PATCH 02/16] drm/vmwgfx: Fix frame-size warning in vmw_mksstat_add_ioctl

2022-10-17 Thread Zack Rusin

From: Martin Krastev 

Function vmw_mksstat_add_ioctl allocates three big arrays on stack.
That triggers frame-size [-Wframe-larger-than=] warning. Refactor
that function to use kmalloc_array instead.

Signed-off-by: Martin Krastev 
Reviewed-by: Zack Rusin 
Reviewed-by: Maaz Mombasawala 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 39 -
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 089046fa21be..a6cea35eaa01 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -1023,10 +1023,11 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void 
*data,
long nr_pinned_stat;
long nr_pinned_info;
long nr_pinned_strs;
-   struct page *pages_stat[ARRAY_SIZE(pdesc->statPPNs)];
-   struct page *pages_info[ARRAY_SIZE(pdesc->infoPPNs)];
-   struct page *pages_strs[ARRAY_SIZE(pdesc->strsPPNs)];
+   struct page **pages_stat = NULL;
+   struct page **pages_info = NULL;
+   struct page **pages_strs = NULL;
size_t i, slot;
+   int ret_err = -ENOMEM;
 
arg->id = -1;
 
@@ -1054,13 +1055,23 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void 
*data,
 
BUG_ON(dev_priv->mksstat_user_pages[slot]);
 
+   /* Allocate statically-sized temp arrays for pages -- too big to keep 
in frame */
+   pages_stat = (struct page **)kmalloc_array(
+   ARRAY_SIZE(pdesc->statPPNs) +
+   ARRAY_SIZE(pdesc->infoPPNs) +
+   ARRAY_SIZE(pdesc->strsPPNs), sizeof(*pages_stat), GFP_KERNEL);
+
+   if (!pages_stat)
+   goto err_nomem;
+
+   pages_info = pages_stat + ARRAY_SIZE(pdesc->statPPNs);
+   pages_strs = pages_info + ARRAY_SIZE(pdesc->infoPPNs);
+
/* Allocate a page for the instance descriptor */
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 
-   if (!page) {
-   atomic_set(_priv->mksstat_user_pids[slot], 0);
-   return -ENOMEM;
-   }
+   if (!page)
+   goto err_nomem;
 
/* Set up the instance descriptor */
pdesc = page_address(page);
@@ -1075,9 +1086,8 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void 
*data,
ARRAY_SIZE(pdesc->description) - 1);
 
if (desc_len < 0) {
-   atomic_set(_priv->mksstat_user_pids[slot], 0);
-   __free_page(page);
-   return -EFAULT;
+   ret_err = -EFAULT;
+   goto err_nomem;
}
 
reset_ppn_array(pdesc->statPPNs, ARRAY_SIZE(pdesc->statPPNs));
@@ -1118,6 +1128,7 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void 
*data,
 
DRM_DEV_INFO(dev->dev, "pid=%d arg.description='%.*s' id=%zu\n", 
current->pid, (int)desc_len, pdesc->description, slot);
 
+   kfree(pages_stat);
return 0;
 
 err_pin_strs:
@@ -1132,9 +1143,13 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void 
*data,
if (nr_pinned_stat > 0)
unpin_user_pages(pages_stat, nr_pinned_stat);
 
+err_nomem:
atomic_set(_priv->mksstat_user_pids[slot], 0);
-   __free_page(page);
-   return -ENOMEM;
+   if (page)
+   __free_page(page);
+   kfree(pages_stat);
+
+   return ret_err;
 }
 
 /**
-- 
2.34.1

[PATCH 16/16] drm/vmwgfx: Optimize initial sizes of cotables

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

It's important to get the initial size of cotables right because
otherwise every app needs to start with a synchronous cotable resize.

This has an measurable impact on system wide performance but is not
relevant for long running single full screen apps for which the cotable
resizes will happen early in the lifecycle and will continue running
just fine.

To eliminate the initial cotable resizes match the initial sizes to what
the userspace expects. The actual result of the patch is simply setting
the initial size of two of the cotables to a size that will align them
to two pages instead of one.

For a piglit run, before:
name   |  total |  per frame | per sec
vmw_cotable_resize |   1405 |   0.12 |1.58
vmw_execbuf_ioctl  | 290805 |  25.43 |  326.05

After:
name   |  total |  per frame | per sec
vmw_cotable_resize |  4 |   0.00 |0.00
vmw_execbuf_ioctl  | 281673 |  25.10 |  274.68

Signed-off-by: Zack Rusin 
Reviewed-by: Michael Banack 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index a4c30f950d7c..0422b6b89cc1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -73,12 +73,24 @@ struct vmw_cotable_info {
bool);
 };
 
+
+/*
+ * Getting the initial size right is difficult because it all depends
+ * on what the userspace is doing. The sizes will be aligned up to
+ * a PAGE_SIZE so we just want to make sure that for majority of apps
+ * the initial number of entries doesn't require an immediate resize.
+ * For all cotables except SVGACOTableDXElementLayoutEntry and
+ * SVGACOTableDXBlendStateEntry the initial number of entries fits
+ * within the PAGE_SIZE. For SVGACOTableDXElementLayoutEntry and
+ * SVGACOTableDXBlendStateEntry we want to reserve two pages,
+ * because that's what all apps will require initially.
+ */
 static const struct vmw_cotable_info co_info[] = {
{1, sizeof(SVGACOTableDXRTViewEntry), _view_cotable_list_destroy},
{1, sizeof(SVGACOTableDXDSViewEntry), _view_cotable_list_destroy},
{1, sizeof(SVGACOTableDXSRViewEntry), _view_cotable_list_destroy},
-   {1, sizeof(SVGACOTableDXElementLayoutEntry), NULL},
-   {1, sizeof(SVGACOTableDXBlendStateEntry), NULL},
+   {PAGE_SIZE/sizeof(SVGACOTableDXElementLayoutEntry) + 1, 
sizeof(SVGACOTableDXElementLayoutEntry), NULL},
+   {PAGE_SIZE/sizeof(SVGACOTableDXBlendStateEntry) + 1, 
sizeof(SVGACOTableDXBlendStateEntry), NULL},
{1, sizeof(SVGACOTableDXDepthStencilEntry), NULL},
{1, sizeof(SVGACOTableDXRasterizerStateEntry), NULL},
{1, sizeof(SVGACOTableDXSamplerEntry), NULL},
-- 
2.34.1

[PATCH 05/16] drm/vmwgfx: Refactor resource validation hashtable to use linux/hashtable implementation.

2022-10-17 Thread Zack Rusin

From: Maaz Mombasawala 

Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable
to reduce maintenence burden.
As part of this effort, refactor the res_ht hashtable used for resource
validation during execbuf execution to use linux/hashtable implementation.
This also refactors vmw_validation_context to use vmw_sw_context as the
container for the hashtable, whereas before it used a vmwgfx_open_hash
directly. This makes vmw_validation_context less generic, but there is
no functional change since res_ht is the only instance where validation
context used a hashtable in vmwgfx driver.

Signed-off-by: Maaz Mombasawala 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 24 --
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|  5 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c| 14 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 55 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 26 +++---
 5 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 13b90273eb77..8d77e79bd904 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -830,6 +830,22 @@ static void vmw_write_driver_id(struct vmw_private *dev)
}
 }
 
+static void vmw_sw_context_init(struct vmw_private *dev_priv)
+{
+   struct vmw_sw_context *sw_context = _priv->ctx;
+
+   hash_init(sw_context->res_ht);
+}
+
+static void vmw_sw_context_fini(struct vmw_private *dev_priv)
+{
+   struct vmw_sw_context *sw_context = _priv->ctx;
+
+   vfree(sw_context->cmd_bounce);
+   if (sw_context->staged_bindings)
+   vmw_binding_state_free(sw_context->staged_bindings);
+}
+
 static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
 {
int ret;
@@ -839,6 +855,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
 
dev_priv->drm.dev_private = dev_priv;
 
+   vmw_sw_context_init(dev_priv);
+
mutex_init(_priv->cmdbuf_mutex);
mutex_init(_priv->binding_mutex);
spin_lock_init(_priv->resource_lock);
@@ -1168,9 +1186,7 @@ static void vmw_driver_unload(struct drm_device *dev)
 
unregister_pm_notifier(_priv->pm_nb);
 
-   if (dev_priv->ctx.res_ht_initialized)
-   vmwgfx_ht_remove(_priv->ctx.res_ht);
-   vfree(dev_priv->ctx.cmd_bounce);
+   vmw_sw_context_fini(dev_priv);
if (dev_priv->enable_fb) {
vmw_fb_off(dev_priv);
vmw_fb_close(dev_priv);
@@ -1198,8 +1214,6 @@ static void vmw_driver_unload(struct drm_device *dev)
vmw_irq_uninstall(_priv->drm);
 
ttm_object_device_release(_priv->tdev);
-   if (dev_priv->ctx.staged_bindings)
-   vmw_binding_state_free(dev_priv->ctx.staged_bindings);
 
for (i = vmw_res_context; i < vmw_res_max; ++i)
idr_destroy(_priv->res_idr[i]);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 09e2d738aa87..d87aeedb78d0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -30,6 +30,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -93,6 +94,7 @@
 #define VMW_RES_STREAM ttm_driver_type2
 #define VMW_RES_FENCE ttm_driver_type3
 #define VMW_RES_SHADER ttm_driver_type4
+#define VMW_RES_HT_ORDER 12
 
 #define MKSSTAT_CAPACITY_LOG2 5U
 #define MKSSTAT_CAPACITY (1U << MKSSTAT_CAPACITY_LOG2)
@@ -425,8 +427,7 @@ struct vmw_ctx_validation_info;
  * @ctx: The validation context
  */
 struct vmw_sw_context{
-   struct vmwgfx_open_hash res_ht;
-   bool res_ht_initialized;
+   DECLARE_HASHTABLE(res_ht, VMW_RES_HT_ORDER);
bool kernel;
struct vmw_fpriv *fp;
struct drm_file *filp;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index f085dbd4736d..c943ab801ca7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR MIT
 /**
  *
- * Copyright 2009 - 2015 VMware, Inc., Palo Alto, CA., USA
+ * Copyright 2009 - 2022 VMware, Inc., Palo Alto, CA., USA
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -25,6 +25,7 @@
  *
  **/
 #include 
+#include 
 
 #include "vmwgfx_drv.h"
 #include "vmwgfx_reg.h"
@@ -34,7 +35,6 @@
 #include "vmwgfx_binding.h"
 #include "vmwgfx_mksstat.h"
 
-#define VMW_RES_HT_ORDER 12
 
 /*
  * Helper macro to get dx_ctx_node if available otherwise print an error
@@ -4101,7 +4101,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
int ret;
int32_t out_fence_fd = -1;
struct

[PATCH 01/16] drm/vmwgfx: Write the driver id registers

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

Driver id registers are a new mechanism in the svga device to hint to the
device which driver is running. This should not change device behavior
in any way, but might be convenient to work-around specific bugs
in guest drivers.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Reviewed-by: Maaz Mombasawala 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 43 +++--
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d7bd5eb1d3ac..45028e25d490 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,10 +25,13 @@
  *
  **/
 
-#include 
-#include 
-#include 
-#include 
+
+#include "vmwgfx_drv.h"
+
+#include "vmwgfx_devcaps.h"
+#include "vmwgfx_mksstat.h"
+#include "vmwgfx_binding.h"
+#include "ttm_object.h"
 
 #include 
 #include 
@@ -41,11 +44,11 @@
 #include 
 #include 
 
-#include "ttm_object.h"
-#include "vmwgfx_binding.h"
-#include "vmwgfx_devcaps.h"
-#include "vmwgfx_drv.h"
-#include "vmwgfx_mksstat.h"
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
 
@@ -806,6 +809,27 @@ static int vmw_detect_version(struct vmw_private *dev)
return 0;
 }
 
+static void vmw_write_driver_id(struct vmw_private *dev)
+{
+   if ((dev->capabilities2 & SVGA_CAP2_DX2) != 0) {
+   vmw_write(dev,  SVGA_REG_GUEST_DRIVER_ID,
+ SVGA_REG_GUEST_DRIVER_ID_LINUX);
+
+   vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION1,
+ LINUX_VERSION_MAJOR << 24 |
+ LINUX_VERSION_PATCHLEVEL << 16 |
+ LINUX_VERSION_SUBLEVEL);
+   vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION2,
+ VMWGFX_DRIVER_MAJOR << 24 |
+ VMWGFX_DRIVER_MINOR << 16 |
+ VMWGFX_DRIVER_PATCHLEVEL);
+   vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION3, 0);
+
+   vmw_write(dev, SVGA_REG_GUEST_DRIVER_ID,
+ SVGA_REG_GUEST_DRIVER_ID_SUBMIT);
+   }
+}
+
 static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
 {
int ret;
@@ -1091,6 +1115,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
vmw_host_printf("vmwgfx: Module Version: %d.%d.%d (kernel: %s)",
VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR,
VMWGFX_DRIVER_PATCHLEVEL, UTS_RELEASE);
+   vmw_write_driver_id(dev_priv);
 
if (dev_priv->enable_fb) {
vmw_fifo_resource_inc(dev_priv);
-- 
2.34.1

[PATCH 00/16] vmwgfx: fb, cursors and hashtable refactor

2022-10-17 Thread Zack Rusin

From: Zack Rusin 

This is a bit larger series than usual but these are all connected in
various ways. The most important changes around everything is centered
include:
- finally getting rid of vmwgfx_hashtab and porting the driver to 
  linux/hashtable
- cleaning up the cursor mob handling, which fixes a bunch of cursor
  issues on kde configs
- removing vmwgfx fb code and porting it to drm fb helpers
- removing vmwgfx faked vblank handling

The rest is largely support code to make the transition easier (with some
igt fixes to get more of it running for regression testing). The result
is removal of over 1000loc with no loss in functionality.

Maaz Mombasawala (5):
  drm/vmwgfx: Refactor resource manager's hashtable to use
linux/hashtable implementation.
  drm/vmwgfx: Remove ttm object hashtable
  drm/vmwgfx: Refactor resource validation hashtable to use
linux/hashtable implementation.
  drm/vmwgfx: Refactor ttm reference object hashtable to use
linux/hashtable.
  drm/vmwgfx : Remove vmwgfx_hashtab

Martin Krastev (1):
  drm/vmwgfx: Fix frame-size warning in vmw_mksstat_add_ioctl

Michael Banack (4):
  drm/vmwgfx: Clean up cursor mobs
  drm/vmwgfx: Start diffing new mob cursors against old ones
  drm/vmwgfx: Support cursor surfaces with mob cursor
  drm/vmwgfx: Diff cursors when using cmds

Zack Rusin (6):
  drm/vmwgfx: Write the driver id registers
  drm/vmwgfx: Do not allow invalid bpp's for dumb buffers
  drm/vmwgfx: Port the framebuffer code to drm fb helpers
  drm/vmwgfx: Remove explicit and broken vblank handling
  drm/vmwgfx: Add a mksstat counter for cotable resizes
  drm/vmwgfx: Optimize initial sizes of cotables

 Documentation/gpu/todo.rst |  11 -
 drivers/gpu/drm/vmwgfx/Kconfig |   7 -
 drivers/gpu/drm/vmwgfx/Makefile|   4 +-
 drivers/gpu/drm/vmwgfx/ttm_object.c| 123 ++-
 drivers/gpu/drm/vmwgfx/ttm_object.h|  20 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c |  16 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c |  62 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c|  29 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 129 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|  49 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c|  14 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 831 -
 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c| 199 -
 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h|  83 --
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c| 622 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h|  31 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c|   8 -
 drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h|   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c|  53 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c   |  27 -
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c   |  26 -
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c |  55 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h |  26 +-
 23 files changed, 632 insertions(+), 1795 deletions(-)
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h

-- 
2.34.1

[PATCH] fbdev: da8xx-fb: Fix error handling in .remove()

2022-10-17 Thread Uwe Kleine-König

Even in the presence of problems (here: regulator_disable() might fail),
it's important to unregister all resources acquired during .probe() and
disable the device (i.e. DMA activity) because even if .remove() returns
an error code, the device is removed and the .remove() callback is never
called again later to catch up.

This is a preparation for making platform remove callbacks return void.

Signed-off-by: Uwe Kleine-König 
---
 drivers/video/fbdev/da8xx-fb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index ae76a2111c77..11922b009ed7 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -1076,7 +1076,8 @@ static int fb_remove(struct platform_device *dev)
if (par->lcd_supply) {
ret = regulator_disable(par->lcd_supply);
if (ret)
-   return ret;
+   dev_warn(>dev, "Failed to disable regulator 
(%pe)\n",
+ERR_PTR(ret));
}
 
lcd_disable_raster(DA8XX_FRAME_WAIT);

base-commit: 4fe89d07dcc2804c8b562f6c7896a45643d34b2f
-- 
2.37.2

Re: [PATCH] drm/amdkfd: use vma_lookup() instead of find_vma()

2022-10-17 Thread Felix Kuehling




On 2022-10-06 22:48, Deming Wang wrote:

Using vma_lookup() verifies the start address is contained in the found
vma.  This results in easier to read the code.


Thank you for the patches. This and your other patch look good to me. 
However, you missed one use of find_vma in svm_range_is_valid. Is that 
an oversight or is there a reason why we need to use find_vma there?


If you're going to respin it, you may also squash the two patches into one.

Thanks,
  Felix




Signed-off-by: Deming Wang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 64fdf63093a0..cabcc2ca3c23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1586,8 +1586,8 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
unsigned long npages;
bool readonly;
  
-		vma = find_vma(mm, addr);

-   if (!vma || addr < vma->vm_start) {
+   vma = vma_lookup(mm, addr);
+   if (!vma) {
r = -EFAULT;
goto unreserve_out;
}
@@ -2542,8 +2542,8 @@ svm_range_get_range_boundaries(struct kfd_process *p, 
int64_t addr,
struct interval_tree_node *node;
unsigned long start_limit, end_limit;
  
-	vma = find_vma(p->mm, addr << PAGE_SHIFT);

-   if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+   vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
+   if (!vma) {
pr_debug("VMA does not exist in address [0x%llx]\n", addr);
return -EFAULT;
}
@@ -2871,8 +2871,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
/* __do_munmap removed VMA, return success as we are handling stale
 * retry fault.
 */
-   vma = find_vma(mm, addr << PAGE_SHIFT);
-   if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+   vma = vma_lookup(mm, addr << PAGE_SHIFT);
+   if (!vma) {
pr_debug("address 0x%llx VMA is removed\n", addr);
r = 0;
goto out_unlock_range;

[PATCH] drm/bridge: ps8640: Add back the 50 ms mystery delay after HPD

2022-10-17 Thread Douglas Anderson

Back in commit 826cff3f7ebb ("drm/bridge: parade-ps8640: Enable
runtime power management") we removed a mysterious 50 ms delay because
"Parade's support [couldn't] explain what the delay [was] for".

While I'm always a fan of removing mysterious delays, I suspect that
we need this mysterious delay to avoid some problems.

Specifically, what I found recently is that on sc7180-trogdor-homestar
sometimes the AUX backlight wasn't initializing properly. Some
debugging showed that the drm_dp_dpcd_read() function that the AUX
backlight driver was calling was returning bogus data about 1% of the
time when I booted up. This confused
drm_panel_dp_aux_backlight(). From continued debugging:
- If I retried the read then the read worked just fine.
- If I added a loop to perform the same read that
  drm_panel_dp_aux_backlight() was doing 30 times at bootup I could
  see that some percentage of the time the first read would give bogus
  data but all 29 additional reads would always be fine.
- If I added a large delay _after_ powering on the panel but before
  powering on PS8640 I could still reproduce the problem.
- If I added a delay after PS8640 powered on then I couldn't reproduce
  the problem.
- I couldn't reproduce the problem on a board with the same panel but
  the ti-sn65dsi86 bridge chip.

To me, the above indicated that there was a problem with PS8640 and
not the panel.

I don't really have any insight into what's going on in the MCU, but
my best guess is that when the MCU itself sees the HPD go high that it
does some AUX transfers itself and this is confusing things.

Let's go back and add back in the mysterious 50 ms delay. We only want
to do this the first time we see HPD go high after booting the MCU,
not every time we double-check HPD.

With this, the backlight initializes reliably on homestar.

Fixes: 826cff3f7ebb ("drm/bridge: parade-ps8640: Enable runtime power 
management")
Signed-off-by: Douglas Anderson 
---

 drivers/gpu/drm/bridge/parade-ps8640.c | 25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c 
b/drivers/gpu/drm/bridge/parade-ps8640.c
index 5be6562c2a19..6a614e54b383 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -105,6 +105,7 @@ struct ps8640 {
struct gpio_desc *gpio_powerdown;
struct device_link *link;
bool pre_enabled;
+   bool need_post_hpd_delay;
 };
 
 static const struct regmap_config ps8640_regmap_config[] = {
@@ -173,14 +174,31 @@ static int _ps8640_wait_hpd_asserted(struct ps8640 
*ps_bridge, unsigned long wai
 {
struct regmap *map = ps_bridge->regmap[PAGE2_TOP_CNTL];
int status;
+   int ret;
 
/*
 * Apparently something about the firmware in the chip signals that
 * HPD goes high by reporting GPIO9 as high (even though HPD isn't
 * actually connected to GPIO9).
 */
-   return regmap_read_poll_timeout(map, PAGE2_GPIO_H, status,
-   status & PS_GPIO9, wait_us / 10, 
wait_us);
+   ret = regmap_read_poll_timeout(map, PAGE2_GPIO_H, status,
+  status & PS_GPIO9, wait_us / 10, 
wait_us);
+
+   /*
+* The first time we see HPD go high after a reset we delay an extra
+* 50 ms. The best guess is that the MCU is doing "stuff" during this
+* time (maybe talking to the panel) and we don't want to interrupt it.
+*
+* No locking is done around "need_post_hpd_delay". If we're here we
+* know we're holding a PM Runtime reference and the only other place
+* that touches this is PM Runtime resume.
+*/
+   if (!ret && ps_bridge->need_post_hpd_delay) {
+   ps_bridge->need_post_hpd_delay = false;
+   msleep(50);
+   }
+
+   return ret;
 }
 
 static int ps8640_wait_hpd_asserted(struct drm_dp_aux *aux, unsigned long 
wait_us)
@@ -388,6 +406,9 @@ static int __maybe_unused ps8640_resume(struct device *dev)
msleep(50);
gpiod_set_value(ps_bridge->gpio_reset, 0);
 
+   /* We just reset things, so we need a delay after the first HPD */
+   ps_bridge->need_post_hpd_delay = true;
+
/*
 * Mystery 200 ms delay for the "MCU to be ready". It's unclear if
 * this is truly necessary since the MCU will already signal that
-- 
2.38.0.413.g74048e4d9e-goog

Re: [BUG] [PATCH] drm/rockchip: use generic fbdev setup

2022-10-17 Thread Johan Jonker




On 10/17/22 21:00, John Keeping wrote:
> On Mon, Oct 17, 2022 at 08:30:23PM +0200, Johan Jonker wrote:
>>
>>
>> On 10/17/22 13:29, Heiko Stuebner wrote:
>>> Am Montag, 17. Oktober 2022, 12:05:16 CEST schrieb John Keeping:
 Hi Johan,

 On Mon, Oct 17, 2022 at 10:11:32AM +0200, Johan Jonker wrote:
> Your patch contribution causes a kernel panic on MK808 with Rockchip 
> rk3066a SoC.
> Would you like to contribute to fix this issue?
> The assumtion that drm_fbdev_generic_setup() does what 
> rockchip_drm_fbdev_init did is not true!
> A revert makes it work again.

>>
 It looks like there are 3 different ways to end up with -ENOMEM here,
 can you track down whether you're hitting one of the cases in
 rockchip_gem_prime_vmap() or if it's the iosys_map_is_null case in
 drm_gem_vmap()?
>>
>> It looks like it comes from rockchip_gem_prime_vmap() second return (2).
>>
>>
>>  if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
>>
>> 
>>
>>  printk("FBDEV rockchip_gem_prime_vmap 2");
>>
>> 
>>  return -ENOMEM;
>>  }
> 
> Ah-ha, Heiko was right that this is because the no-iommu path is broken
> as a result of switching to the generic fbdev code.
> 
> This patch should fix it, but I wonder if Thomas has any ideas about a
> better way to handle this since it feels a bit hacky to special-case the
> fb_helper inside the GEM code:

The penguin is back on screen. Thanks!

> 
> -- >8 --
> diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c 
> b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
> index 614e97aaac80..da8a69953706 100644
> --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
> +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
> @@ -364,9 +364,12 @@ rockchip_gem_create_with_handle(struct drm_file 
> *file_priv,
>  {
>   struct rockchip_gem_object *rk_obj;
>   struct drm_gem_object *obj;
> + bool is_framebuffer;
>   int ret;
>  
> - rk_obj = rockchip_gem_create_object(drm, size, false);
> + is_framebuffer = drm->fb_helper && file_priv == 
> drm->fb_helper->client.file;
> +
> + rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer);
>   if (IS_ERR(rk_obj))
>   return ERR_CAST(rk_obj);
> -- 8< --

Re: [PATCH 2/2] drm/connector: send hotplug uevent on connector cleanup

2022-10-17 Thread Lyude Paul

LGTM! Thank you for the help with this:

Reviewed-by: Lyude Paul 

On Mon, 2022-10-17 at 15:32 +, Simon Ser wrote:
> A typical DP-MST unplug removes a KMS connector. However care must
> be taken to properly synchronize with user-space. The expected
> sequence of events is the following:
> 
> 1. The kernel notices that the DP-MST port is gone.
> 2. The kernel marks the connector as disconnected, then sends a
>uevent to make user-space re-scan the connector list.
> 3. User-space notices the connector goes from connected to disconnected,
>disables it.
> 4. Kernel handles the the IOCTL disabling the connector. On success,
>the very last reference to the struct drm_connector is dropped and
>drm_connector_cleanup() is called.
> 5. The connector is removed from the list, and a uevent is sent to tell
>user-space that the connector disappeared.
> 
> The very last step was missing. As a result, user-space thought the
> connector still existed and could try to disable it again. Since the
> kernel no longer knows about the connector, that would end up with
> EINVAL and confused user-space.
> 
> Fix this by sending a hotplug uevent from drm_connector_cleanup().
> 
> Signed-off-by: Simon Ser 
> Cc: sta...@vger.kernel.org
> Cc: Daniel Vetter 
> Cc: Lyude Paul 
> Cc: Jonas Ådahl 
> ---
>  drivers/gpu/drm/drm_connector.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
> index e3142c8142b3..90dad87e9ad0 100644
> --- a/drivers/gpu/drm/drm_connector.c
> +++ b/drivers/gpu/drm/drm_connector.c
> @@ -582,6 +582,9 @@ void drm_connector_cleanup(struct drm_connector 
> *connector)
>   mutex_destroy(>mutex);
>  
>   memset(connector, 0, sizeof(*connector));
> +
> + if (dev->registered)
> + drm_sysfs_hotplug_event(dev);
>  }
>  EXPORT_SYMBOL(drm_connector_cleanup);
>  

-- 
Cheers,
 Lyude Paul (she/her)
 Software Engineer at Red Hat

Re: [BUG] [PATCH] drm/rockchip: use generic fbdev setup

2022-10-17 Thread John Keeping

On Mon, Oct 17, 2022 at 08:30:23PM +0200, Johan Jonker wrote:
> 
> 
> On 10/17/22 13:29, Heiko Stuebner wrote:
> > Am Montag, 17. Oktober 2022, 12:05:16 CEST schrieb John Keeping:
> >> Hi Johan,
> >>
> >> On Mon, Oct 17, 2022 at 10:11:32AM +0200, Johan Jonker wrote:
> >>> Your patch contribution causes a kernel panic on MK808 with Rockchip 
> >>> rk3066a SoC.
> >>> Would you like to contribute to fix this issue?
> >>> The assumtion that drm_fbdev_generic_setup() does what 
> >>> rockchip_drm_fbdev_init did is not true!
> >>> A revert makes it work again.
> >>
> 
> >> It looks like there are 3 different ways to end up with -ENOMEM here,
> >> can you track down whether you're hitting one of the cases in
> >> rockchip_gem_prime_vmap() or if it's the iosys_map_is_null case in
> >> drm_gem_vmap()?
> 
> It looks like it comes from rockchip_gem_prime_vmap() second return (2).
> 
> 
>   if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
> 
> 
> 
>   printk("FBDEV rockchip_gem_prime_vmap 2");
> 
> 
>   return -ENOMEM;
>   }

Ah-ha, Heiko was right that this is because the no-iommu path is broken
as a result of switching to the generic fbdev code.

This patch should fix it, but I wonder if Thomas has any ideas about a
better way to handle this since it feels a bit hacky to special-case the
fb_helper inside the GEM code:

-- >8 --
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c 
b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 614e97aaac80..da8a69953706 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -364,9 +364,12 @@ rockchip_gem_create_with_handle(struct drm_file *file_priv,
 {
struct rockchip_gem_object *rk_obj;
struct drm_gem_object *obj;
+   bool is_framebuffer;
int ret;
 
-   rk_obj = rockchip_gem_create_object(drm, size, false);
+   is_framebuffer = drm->fb_helper && file_priv == 
drm->fb_helper->client.file;
+
+   rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer);
if (IS_ERR(rk_obj))
return ERR_CAST(rk_obj);
-- 8< --

Re: [PATCH v3] drm/amd/display: add an ASSERT() to irq service functions

2022-10-17 Thread Harry Wentland

On 2022-10-17 12:13, Hamza Mahfooz wrote:
> Currently, if we encounter unimplemented functions, it is difficult to
> tell what caused them just by looking at dmesg and that is compounded by
> the fact that it is often hard to reproduce said issues, for instance we
> have had reports of this condition being triggered when removing a
> secondary display that is setup in mirror mode and is connected using
> usb-c. So, to have access to more detailed debugging information, add an
> ASSERT() to dal_irq_service_ack() and dal_irq_service_set() that only
> triggers when we encounter an unimplemented function.
> 
> Signed-off-by: Hamza Mahfooz 

Reviewed-by: Harry Wentland 

Harry

> ---
> v2: detail specific instance that I'm interested in and use ASSERT()
> instead of WARN().
> 
> v3: move ASSERT()s inside the new if blocks.
> ---
>  .../gpu/drm/amd/display/dc/irq/irq_service.c| 17 +++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c 
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> index 7bad39bba86b..d100edaedbbb 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> @@ -112,8 +112,15 @@ bool dal_irq_service_set(
>  
>   dal_irq_service_ack(irq_service, source);
>  
> - if (info->funcs && info->funcs->set)
> + if (info->funcs && info->funcs->set) {
> + if (info->funcs->set == dal_irq_service_dummy_set) {
> + DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__,
> +source, enable);
> + ASSERT(0);
> + }
> +
>   return info->funcs->set(irq_service, info, enable);
> + }
>  
>   dal_irq_service_set_generic(irq_service, info, enable);
>  
> @@ -146,8 +153,14 @@ bool dal_irq_service_ack(
>   return false;
>   }
>  
> - if (info->funcs && info->funcs->ack)
> + if (info->funcs && info->funcs->ack) {
> + if (info->funcs->ack == dal_irq_service_dummy_ack) {
> + DC_LOG_WARNING("%s: src: %d\n", __func__, source);
> + ASSERT(0);
> + }
> +
>   return info->funcs->ack(irq_service, info);
> + }
>  
>   dal_irq_service_ack_generic(irq_service, info);
>

Re: [PATCH] drm: bridge: adv7511: use dev_err_probe in probe function

2022-10-17 Thread Laurent Pinchart

Hi Ahmad,

Thank you for the patch.

On Mon, Oct 17, 2022 at 08:28:09PM +0200, Ahmad Fatoum wrote:
> adv7511 probe may need to be attempted multiple times before no
> -EPROBE_DEFER is returned. Currently, every such probe results in
> an error message:
> 
> [4.534229] adv7511 1-003d: failed to find dsi host
> [4.580288] adv7511 1-003d: failed to find dsi host
> 
> This is misleading, as there is no error and probe deferral is normal
> behavior. Fix this by using dev_err_probe that will suppress
> -EPROBE_DEFER errors. While at it, we touch all dev_err in the probe
> path. This makes the code more concise and included the error code
> everywhere to aid user in debugging.
> 
> Fixes: 1e4d58cd7f88 ("drm/bridge: adv7533: Create a MIPI DSI device")
> Signed-off-by: Ahmad Fatoum 
> ---
>  drivers/gpu/drm/bridge/adv7511/adv7511_drv.c |  6 ++
>  drivers/gpu/drm/bridge/adv7511/adv7533.c | 18 ++
>  2 files changed, 8 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c 
> b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
> index 1c37779b434a..4148b6d6f151 100644
> --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
> +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
> @@ -1229,10 +1229,8 @@ static int adv7511_probe(struct i2c_client *i2c, const 
> struct i2c_device_id *id)
>   return ret;
>  
>   ret = adv7511_init_regulators(adv7511);
> - if (ret) {
> - dev_err(dev, "failed to init regulators\n");
> - return ret;
> - }
> + if (ret)
> + return dev_err_probe(dev, ret, "failed to init regulators\n");
>  
>   /*
>* The power down GPIO is optional. If present, toggle it from active to
> diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c 
> b/drivers/gpu/drm/bridge/adv7511/adv7533.c
> index ef6270806d1d..b32b796c25fb 100644
> --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c
> +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
> @@ -148,16 +148,12 @@ int adv7533_attach_dsi(struct adv7511 *adv)
>};
>  
>   host = of_find_mipi_dsi_host_by_node(adv->host_node);
> - if (!host) {
> - dev_err(dev, "failed to find dsi host\n");
> - return -EPROBE_DEFER;
> - }
> + if (!host)
> + return dev_err_probe(dev, -EPROBE_DEFER, "failed to find dsi 
> host\n");

I'd wrap this line:

return dev_err_probe(dev, -EPROBE_DEFER,
 "failed to find dsi host\n");

>  
>   dsi = devm_mipi_dsi_device_register_full(dev, host, );
> - if (IS_ERR(dsi)) {
> - dev_err(dev, "failed to create dsi device\n");
> - return PTR_ERR(dsi);
> - }
> + if (IS_ERR(dsi))
> + return dev_err_probe(dev, PTR_ERR(dsi), "failed to create dsi 
> device\n");

Same here.

Reviewed-by: Laurent Pinchart 

>  
>   adv->dsi = dsi;
>  
> @@ -167,10 +163,8 @@ int adv7533_attach_dsi(struct adv7511 *adv)
> MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE;
>  
>   ret = devm_mipi_dsi_attach(dev, dsi);
> - if (ret < 0) {
> - dev_err(dev, "failed to attach dsi to host\n");
> - return ret;
> - }
> + if (ret < 0)
> + return dev_err_probe(dev, ret, "failed to attach dsi to 
> host\n");
>  
>   return 0;
>  }

-- 
Regards,

Laurent Pinchart

Re: [BUG] [PATCH] drm/rockchip: use generic fbdev setup

2022-10-17 Thread Johan Jonker




On 10/17/22 13:29, Heiko Stuebner wrote:
> Am Montag, 17. Oktober 2022, 12:05:16 CEST schrieb John Keeping:
>> Hi Johan,
>>
>> On Mon, Oct 17, 2022 at 10:11:32AM +0200, Johan Jonker wrote:
>>> Your patch contribution causes a kernel panic on MK808 with Rockchip 
>>> rk3066a SoC.
>>> Would you like to contribute to fix this issue?
>>> The assumtion that drm_fbdev_generic_setup() does what 
>>> rockchip_drm_fbdev_init did is not true!
>>> A revert makes it work again.
>>

>> It looks like there are 3 different ways to end up with -ENOMEM here,
>> can you track down whether you're hitting one of the cases in
>> rockchip_gem_prime_vmap() or if it's the iosys_map_is_null case in
>> drm_gem_vmap()?

It looks like it comes from rockchip_gem_prime_vmap() second return (2).





int rockchip_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
{
struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);

if (rk_obj->pages) {
void *vaddr = vmap(rk_obj->pages, rk_obj->num_pages, VM_MAP,
  pgprot_writecombine(PAGE_KERNEL));
if (!vaddr) {
printk("FBDEV rockchip_gem_prime_vmap 1");
return -ENOMEM;
}
iosys_map_set_vaddr(map, vaddr);
return 0;
}

if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) {



printk("FBDEV rockchip_gem_prime_vmap 2");


return -ENOMEM;
}
iosys_map_set_vaddr(map, rk_obj->kvaddr);

return 0;
}



[7.678392] [drm:drm_client_modeset_probe] connector 39 enabled? yes
[7.678435] [drm:drm_client_modeset_probe] Not using firmware configuration
[7.678465] [drm:drm_client_modeset_probe] looking for cmdline mode on 
connector 39
[7.678494] [drm:drm_client_modeset_probe] looking for preferred mode on 
connector 39 0
[7.678521] [drm:drm_client_modeset_probe] found mode 1920x1080
[7.678545] [drm:drm_client_modeset_probe] picking CRTCs for 1920x1080 config
[7.678585] [drm:drm_client_modeset_probe] desired mode 1920x1080 set on 
crtc 35 (0,0)
[7.801673] Console: switching to colour frame buffer device 240x67


[7.811047] FBDEV rockchip_gem_prime_vmap 2


[7.811071] [ cut here ]
[7.811084] WARNING: CPU: 0 PID: 35 at drivers/gpu/drm/drm_fb_helper.c:471 
drm_fb_helper_damage_work+0x138/0x3b4
[7.811198] rockchip-drm display-subsystem: Damage blitter failed: ret=-12
[7.811219] Modules linked in:
[7.811244] CPU: 0 PID: 35 Comm: kworker/0:4 Not tainted 
6.0.0-next-20221013+ #46
[7.811281] Hardware name: Rockchip (Device Tree)
[7.811300] Workqueue: events drm_fb_helper_damage_work
[7.811352] Backtrace: 
[7.811370]  dump_backtrace from show_stack+0x20/0x24
[7.811431]  r7:01d7 r6:0009 r5:c0b2bc60 r4:6013
[7.811444]  show_stack from dump_stack_lvl+0x48/0x54
[7.811512]  dump_stack_lvl from dump_stack+0x18/0x1c
[7.811580]  r5:c0586064 r4:c0b6374c
[7.811590]  dump_stack from __warn+0xdc/0x154
[7.811677]  __warn from warn_slowpath_fmt+0xa4/0xd8
[7.811740]  r7:01d7 r6:c0b6374c r5:c1004ec8 r4:c0b639e8
[7.811750]  warn_slowpath_fmt from drm_fb_helper_damage_work+0x138/0x3b4
[7.811821]  r9:ef7cf105 r8:c15dfc00 r7:fff4 r6:c200b490 r5:c1004ec8 
r4:c200b494
[7.811833]  drm_fb_helper_damage_work from process_one_work+0x230/0x518
[7.811912]  r10:c110d140 r9:ef7cf105 r8: r7:ef7cf100 r6:ef7cbf00 
r5:c200e300
[7.811927]  r4:c200b494
[7.811936]  process_one_work from worker_thread+0x54/0x554
[7.811991]  r10:ef7cbf00 r9:0008 r8:c1003d40 r7:ef7cbf1c r6:c200e318 
r5:ef7cbf00
[7.812006]  r4:c200e300
[7.812015]  worker_thread from kthread+0xe8/0x104
[7.812100]  r10:f0929e84 r9:c200da00 r8:c169aa80 r7:c200e300 r6:c01419e4 
r5:
[7.812114]  r4:c200d780
[7.812124]  kthread from ret_from_fork+0x14/0x2c
[7.812178] Exception stack(0xf092dfb0 to 0xf092dff8)
[7.812205] dfa0:   
 
[7.812232] dfc0:       
 
[7.812255] dfe0:     0013 
[7.812282]  r10: r9: r8: r7: r6: 
r5:c01491a8
[7.812299]  r4:c200d780 r3:0001
[7.812309] ---[ end trace  ]---
[7.812336] FBDEV rockchip_gem_prime_vmap 2
[7.889795] FBDEV rockchip_gem_prime_vmap 2
[7.890418] FBDEV rockchip_gem_prime_vmap 2
[7.899447] FBDEV rockchip_gem_prime_vmap 2
[7.905252] FBDEV rockchip_gem_prime_vmap 2

>>
>> I guess the memory usage increases slightly using the generic code and
>> RK3066 has less memory available.
> 
> also rk3066 and rk3188 do not have an iommu, so rely
> on cma allocations.
> 
> 
> Heiko
> 
>

[PATCH] drm: bridge: adv7511: use dev_err_probe in probe function

2022-10-17 Thread Ahmad Fatoum

adv7511 probe may need to be attempted multiple times before no
-EPROBE_DEFER is returned. Currently, every such probe results in
an error message:

[4.534229] adv7511 1-003d: failed to find dsi host
[4.580288] adv7511 1-003d: failed to find dsi host

This is misleading, as there is no error and probe deferral is normal
behavior. Fix this by using dev_err_probe that will suppress
-EPROBE_DEFER errors. While at it, we touch all dev_err in the probe
path. This makes the code more concise and included the error code
everywhere to aid user in debugging.

Fixes: 1e4d58cd7f88 ("drm/bridge: adv7533: Create a MIPI DSI device")
Signed-off-by: Ahmad Fatoum 
---
 drivers/gpu/drm/bridge/adv7511/adv7511_drv.c |  6 ++
 drivers/gpu/drm/bridge/adv7511/adv7533.c | 18 ++
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c 
b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 1c37779b434a..4148b6d6f151 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -1229,10 +1229,8 @@ static int adv7511_probe(struct i2c_client *i2c, const 
struct i2c_device_id *id)
return ret;
 
ret = adv7511_init_regulators(adv7511);
-   if (ret) {
-   dev_err(dev, "failed to init regulators\n");
-   return ret;
-   }
+   if (ret)
+   return dev_err_probe(dev, ret, "failed to init regulators\n");
 
/*
 * The power down GPIO is optional. If present, toggle it from active to
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c 
b/drivers/gpu/drm/bridge/adv7511/adv7533.c
index ef6270806d1d..b32b796c25fb 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7533.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
@@ -148,16 +148,12 @@ int adv7533_attach_dsi(struct adv7511 *adv)
 };
 
host = of_find_mipi_dsi_host_by_node(adv->host_node);
-   if (!host) {
-   dev_err(dev, "failed to find dsi host\n");
-   return -EPROBE_DEFER;
-   }
+   if (!host)
+   return dev_err_probe(dev, -EPROBE_DEFER, "failed to find dsi 
host\n");
 
dsi = devm_mipi_dsi_device_register_full(dev, host, );
-   if (IS_ERR(dsi)) {
-   dev_err(dev, "failed to create dsi device\n");
-   return PTR_ERR(dsi);
-   }
+   if (IS_ERR(dsi))
+   return dev_err_probe(dev, PTR_ERR(dsi), "failed to create dsi 
device\n");
 
adv->dsi = dsi;
 
@@ -167,10 +163,8 @@ int adv7533_attach_dsi(struct adv7511 *adv)
  MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE;
 
ret = devm_mipi_dsi_attach(dev, dsi);
-   if (ret < 0) {
-   dev_err(dev, "failed to attach dsi to host\n");
-   return ret;
-   }
+   if (ret < 0)
+   return dev_err_probe(dev, ret, "failed to attach dsi to 
host\n");
 
return 0;
 }
-- 
2.30.2

Re: [PATCH v2 4/7] drm/simpledrm: Add support for system memory framebuffers

2022-10-17 Thread Rob Herring

On Mon, Oct 17, 2022 at 9:54 AM Thierry Reding  wrote:
>
> On Mon, Oct 10, 2022 at 10:12:34AM +0200, Thomas Zimmermann wrote:
> > Hi
> >
> > Am 07.10.22 um 14:49 schrieb Thierry Reding:
> > > From: Thierry Reding 
> > >
> > > Simple framebuffers can be set up in system memory, which cannot be
> > > requested and/or I/O remapped using the I/O resource helpers. Add a
> > > separate code path that obtains system memory framebuffers from the
> > > reserved memory region referenced in the memory-region property.
> > >
> > > v2: make screen base a struct iosys_map to avoid sparse warnings

[...]

> > > +static int simple_framebuffer_init(struct reserved_mem *rmem)
> > > +{
> > > +   pr_info("framebuffer memory at %pa, size %lu bytes\n", >base,
> > > +   (unsigned long)rmem->size);
> > > +
> > > +   rmem->ops = _framebuffer_ops;
> > > +
> > > +   return 0;
> > > +}
> > > +RESERVEDMEM_OF_DECLARE(simple_framebuffer, "framebuffer", 
> > > simple_framebuffer_init);
> >
> > What's the prupose of these code at all?  I looked through the kernel, but
> > there aren't many other examples of it.
>
> This is a fairly standard construct to deal with early memory
> reservations. What happens is roughly this: during early kernel boot,
> the reserved-memory core code will iterate over all children of the top-
> level reserved-memory node and see if they have a compatible string that
> matches one of the entries in the table created by these
> RESERVEDMEM_OF_DECLARE entries. It will then call the init function for
> a matched entry and register a struct reserved_mem for these. The init
> function in this case just dumps an informational message to the boot
> log to provide some information about the framebuffer region that was
> reserved (which can be used for example for troubleshooting purposes)
> and sets the device init/release operations (which will be called when a
> device is associated with the reserved memory region, i.e. when the
> of_reserved_mem_device_init_by_idx() function is called).
>
> The reason why there aren't many examples of this is because these are
> special memory regions that (at least upstream) kernels seldom support.
> Perhaps the most common use-cases are the shared DMA pools (such as
> CMA).

Also, not all regions need to be handled 'early' before slab allocator
or drivers are probed. Do you need early handling here? I can't see
why other than if fbcon is up early.

Rob

Re: [PATCH v2 1/2] drm/i915: Add intel_ prefix to struct ip_version

2022-10-17 Thread Lucas De Marchi


On Tue, Oct 11, 2022 at 08:38:50AM -0700, Radhakrishna Sripada wrote:

Rename struct ip_version to intel_ip_version to comply with the
naming conventions for structures.

Suggested-by: Jani Nikula 
Signed-off-by: Radhakrishna Sripada 



Reviewed-by: Lucas De Marchi 

Lucas De Marchi

[PATCH v7 18/21] dma-buf: Move dma_buf_mmap() to dynamic locking specification

2022-10-17 Thread Dmitry Osipenko

Move dma_buf_mmap() function to the dynamic locking specification by
taking the reservation lock. Neither of the today's drivers take the
reservation lock within the mmap() callback, hence it's safe to enforce
the locking.

Acked-by: Sumit Semwal 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f54c649f922a..f149b384f4dd 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1390,6 +1390,8 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_end_cpu_access, DMA_BUF);
 int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 unsigned long pgoff)
 {
+   int ret;
+
if (WARN_ON(!dmabuf || !vma))
return -EINVAL;
 
@@ -1410,7 +1412,11 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct 
vm_area_struct *vma,
vma_set_file(vma, dmabuf->file);
vma->vm_pgoff = pgoff;
 
-   return dmabuf->ops->mmap(dmabuf, vma);
+   dma_resv_lock(dmabuf->resv, NULL);
+   ret = dmabuf->ops->mmap(dmabuf, vma);
+   dma_resv_unlock(dmabuf->resv);
+
+   return ret;
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF);
 
-- 
2.37.3

Re: [PATCH 5/7] drm/vc4: dpi: Support BGR666 formats

2022-10-17 Thread Dave Stevenson

Hi Laurent

Thanks for the review.

On Sat, 15 Oct 2022 at 18:31, Laurent Pinchart
 wrote:
>
> On Sat, Oct 15, 2022 at 08:26:48PM +0300, Laurent Pinchart wrote:
> > Hi Maxime and Joerg,
> >
> > Thank you for the patch.
> >
> > On Thu, Oct 13, 2022 at 11:56:49AM +0200, Maxime Ripard wrote:
> > > From: Joerg Quinten 
> > >
> > > The VC4 DPI output can support multiple BGR666 variants, but they were
> > > never added to the driver. Let's add the the support for those formats.
> > >
> > > Signed-off-by: Joerg Quinten 
> > > Signed-off-by: Maxime Ripard 
> >
> > Reviewed-by: Laurent Pinchart 
> >
> > > ---
> > >  drivers/gpu/drm/vc4/vc4_dpi.c | 6 ++
> > >  1 file changed, 6 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
> > > index 7da3dd1db50e..ecbe4cd87036 100644
> > > --- a/drivers/gpu/drm/vc4/vc4_dpi.c
> > > +++ b/drivers/gpu/drm/vc4/vc4_dpi.c
> > > @@ -170,10 +170,16 @@ static void vc4_dpi_encoder_enable(struct 
> > > drm_encoder *encoder)
> > > dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR,
> > >DPI_ORDER);
> > > break;
> > > +   case MEDIA_BUS_FMT_BGR666_1X24_CPADHI:
> > > +   dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR, 
> > > DPI_ORDER);
> > > +   fallthrough;
>
> Upon closer inspection of the code, I think you also need
>
> -   dpi_c &= ~DPI_FORMAT_MASK;
> +   dpi_c &= ~(DPI_ORDER_MASK | DPI_FORMAT_MASK);
>
> a few lines above.

Hmm, curious as there appears to be a difference between our vendor
tree and mainline in the contents of a patch with almost the same
commit text.

https://github.com/torvalds/linux/commit/7a70b0b97889a89f397913c971cadfc6db9b310a
vs 
https://github.com/raspberrypi/linux/commit/9d9dc236a9ee9021363b8601b6188e7be86d1971

Our tree leaves dpi_c effectively clear, and selects the default at
the end. Mainline sets the default first.


Even so, dpi_c gets initialised with
dpi_c = DPI_ENABLE;
dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
dpi_c &= ~DPI_FORMAT_MASK;
so at this point in the switch, the DPI_ORDER bits haven't changed
from the default of 0.

  Dave

> > > case MEDIA_BUS_FMT_RGB666_1X24_CPADHI:
> > > dpi_c |= 
> > > VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_2,
> > >DPI_FORMAT);
> > > break;
> > > +   case MEDIA_BUS_FMT_BGR666_1X18:
> > > +   dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR, 
> > > DPI_ORDER);
> > > +   fallthrough;
> > > case MEDIA_BUS_FMT_RGB666_1X18:
> > > dpi_c |= 
> > > VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1,
> > >DPI_FORMAT);
> > >
>
> --
> Regards,
>
> Laurent Pinchart

[PATCH v7 15/21] dma-buf: Move dma_buf_vmap() to dynamic locking specification

2022-10-17 Thread Dmitry Osipenko

Move dma_buf_vmap/vunmap() functions to the dynamic locking
specification by asserting that the reservation lock is held.

Acked-by: Sumit Semwal 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 3e4060dadb74..2c4381bb9478 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1450,6 +1450,8 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map 
*map)
if (WARN_ON(!dmabuf))
return -EINVAL;
 
+   dma_resv_assert_held(dmabuf->resv);
+
if (!dmabuf->ops->vmap)
return -EINVAL;
 
@@ -1515,6 +1517,8 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct 
iosys_map *map)
if (WARN_ON(!dmabuf))
return;
 
+   dma_resv_assert_held(dmabuf->resv);
+
BUG_ON(iosys_map_is_null(>vmap_ptr));
BUG_ON(dmabuf->vmapping_counter == 0);
BUG_ON(!iosys_map_is_equal(>vmap_ptr, map));
-- 
2.37.3

[PATCH v7 21/21] dma-buf: Remove obsoleted internal lock

2022-10-17 Thread Dmitry Osipenko

The internal dma-buf lock isn't needed anymore because the updated
locking specification claims that dma-buf reservation must be locked
by importers, and thus, the internal data is already protected by the
reservation lock. Remove the obsoleted internal lock.

Acked-by: Sumit Semwal 
Acked-by: Christian König 
Reviewed-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 14 --
 include/linux/dma-buf.h   |  9 -
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f1d968e5bac4..7663c4e784b6 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -657,7 +657,6 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
 
dmabuf->file = file;
 
-   mutex_init(>lock);
INIT_LIST_HEAD(>attachments);
 
mutex_lock(_list.lock);
@@ -1503,7 +1502,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF);
 int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map)
 {
struct iosys_map ptr;
-   int ret = 0;
+   int ret;
 
iosys_map_clear(map);
 
@@ -1515,28 +1514,25 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct 
iosys_map *map)
if (!dmabuf->ops->vmap)
return -EINVAL;
 
-   mutex_lock(>lock);
if (dmabuf->vmapping_counter) {
dmabuf->vmapping_counter++;
BUG_ON(iosys_map_is_null(>vmap_ptr));
*map = dmabuf->vmap_ptr;
-   goto out_unlock;
+   return 0;
}
 
BUG_ON(iosys_map_is_set(>vmap_ptr));
 
ret = dmabuf->ops->vmap(dmabuf, );
if (WARN_ON_ONCE(ret))
-   goto out_unlock;
+   return ret;
 
dmabuf->vmap_ptr = ptr;
dmabuf->vmapping_counter = 1;
 
*map = dmabuf->vmap_ptr;
 
-out_unlock:
-   mutex_unlock(>lock);
-   return ret;
+   return 0;
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, DMA_BUF);
 
@@ -1583,13 +1579,11 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct 
iosys_map *map)
BUG_ON(dmabuf->vmapping_counter == 0);
BUG_ON(!iosys_map_is_equal(>vmap_ptr, map));
 
-   mutex_lock(>lock);
if (--dmabuf->vmapping_counter == 0) {
if (dmabuf->ops->vunmap)
dmabuf->ops->vunmap(dmabuf, map);
iosys_map_clear(>vmap_ptr);
}
-   mutex_unlock(>lock);
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, DMA_BUF);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index f11b5bbc2f37..6fa8d4e29719 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -326,15 +326,6 @@ struct dma_buf {
/** @ops: dma_buf_ops associated with this buffer object. */
const struct dma_buf_ops *ops;
 
-   /**
-* @lock:
-*
-* Used internally to serialize list manipulation, attach/detach and
-* vmap/unmap. Note that in many cases this is superseeded by
-* dma_resv_lock() on @resv.
-*/
-   struct mutex lock;
-
/**
 * @vmapping_counter:
 *
-- 
2.37.3

[PATCH v7 17/21] dma-buf: Move dma_buf_map_attachment() to dynamic locking specification

2022-10-17 Thread Dmitry Osipenko

Move dma-buf attachment mapping functions to the dynamic locking
specification by asserting that the reservation lock is held.

Acked-by: Sumit Semwal 
Reviewed-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d685a5adb122..f54c649f922a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1038,8 +1038,7 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
 
-   if (dma_buf_attachment_is_dynamic(attach))
-   dma_resv_assert_held(attach->dmabuf->resv);
+   dma_resv_assert_held(attach->dmabuf->resv);
 
if (attach->sgt) {
/*
@@ -1054,7 +1053,6 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
}
 
if (dma_buf_is_dynamic(attach->dmabuf)) {
-   dma_resv_assert_held(attach->dmabuf->resv);
if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
r = attach->dmabuf->ops->pin(attach);
if (r)
@@ -1143,15 +1141,11 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
 
-   if (dma_buf_attachment_is_dynamic(attach))
-   dma_resv_assert_held(attach->dmabuf->resv);
+   dma_resv_assert_held(attach->dmabuf->resv);
 
if (attach->sgt == sg_table)
return;
 
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_assert_held(attach->dmabuf->resv);
-
__unmap_dma_buf(attach, sg_table, direction);
 
if (dma_buf_is_dynamic(attach->dmabuf) &&
-- 
2.37.3

[PATCH v7 16/21] dma-buf: Move dma_buf_attach() to dynamic locking specification

2022-10-17 Thread Dmitry Osipenko

Move dma-buf attachment API functions to the dynamic locking specification
by taking the reservation lock around the mapping operations. The strict
locking convention prevents deadlock situations for dma-buf importers and
exporters.

Acked-by: Sumit Semwal 
Reviewed-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 2c4381bb9478..d685a5adb122 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -859,8 +859,8 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct 
device *dev,
dma_buf_is_dynamic(dmabuf)) {
struct sg_table *sgt;
 
+   dma_resv_lock(attach->dmabuf->resv, NULL);
if (dma_buf_is_dynamic(attach->dmabuf)) {
-   dma_resv_lock(attach->dmabuf->resv, NULL);
ret = dmabuf->ops->pin(attach);
if (ret)
goto err_unlock;
@@ -873,8 +873,7 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct 
device *dev,
ret = PTR_ERR(sgt);
goto err_unpin;
}
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_unlock(attach->dmabuf->resv);
+   dma_resv_unlock(attach->dmabuf->resv);
attach->sgt = sgt;
attach->dir = DMA_BIDIRECTIONAL;
}
@@ -890,8 +889,7 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct 
device *dev,
dmabuf->ops->unpin(attach);
 
 err_unlock:
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_unlock(attach->dmabuf->resv);
+   dma_resv_unlock(attach->dmabuf->resv);
 
dma_buf_detach(dmabuf, attach);
return ERR_PTR(ret);
@@ -937,21 +935,19 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct 
dma_buf_attachment *attach)
if (WARN_ON(!dmabuf || !attach))
return;
 
+   dma_resv_lock(attach->dmabuf->resv, NULL);
+
if (attach->sgt) {
-   if (dma_buf_is_dynamic(attach->dmabuf))
-   dma_resv_lock(attach->dmabuf->resv, NULL);
 
__unmap_dma_buf(attach, attach->sgt, attach->dir);
 
-   if (dma_buf_is_dynamic(attach->dmabuf)) {
+   if (dma_buf_is_dynamic(attach->dmabuf))
dmabuf->ops->unpin(attach);
-   dma_resv_unlock(attach->dmabuf->resv);
-   }
}
-
-   dma_resv_lock(dmabuf->resv, NULL);
list_del(>node);
+
dma_resv_unlock(dmabuf->resv);
+
if (dmabuf->ops->detach)
dmabuf->ops->detach(dmabuf, attach);
 
-- 
2.37.3

[PATCH v7 19/21] dma-buf: Document dynamic locking convention

2022-10-17 Thread Dmitry Osipenko

Add documentation for the dynamic locking convention. The documentation
tells dma-buf API users when they should take the reservation lock and
when not.

Acked-by: Sumit Semwal 
Reviewed-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 Documentation/driver-api/dma-buf.rst |  6 +++
 drivers/dma-buf/dma-buf.c| 64 
 2 files changed, 70 insertions(+)

diff --git a/Documentation/driver-api/dma-buf.rst 
b/Documentation/driver-api/dma-buf.rst
index 36a76cbe9095..622b8156d212 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -119,6 +119,12 @@ DMA Buffer ioctls
 
 .. kernel-doc:: include/uapi/linux/dma-buf.h
 
+DMA-BUF locking convention
+~
+
+.. kernel-doc:: drivers/dma-buf/dma-buf.c
+   :doc: locking convention
+
 Kernel Functions and Structures Reference
 ~
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f149b384f4dd..f1d968e5bac4 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -795,6 +795,70 @@ static struct sg_table * __map_dma_buf(struct 
dma_buf_attachment *attach,
return sg_table;
 }
 
+/**
+ * DOC: locking convention
+ *
+ * In order to avoid deadlock situations between dma-buf exports and importers,
+ * all dma-buf API users must follow the common dma-buf locking convention.
+ *
+ * Convention for importers
+ *
+ * 1. Importers must hold the dma-buf reservation lock when calling these
+ *functions:
+ *
+ * - dma_buf_pin()
+ * - dma_buf_unpin()
+ * - dma_buf_map_attachment()
+ * - dma_buf_unmap_attachment()
+ * - dma_buf_vmap()
+ * - dma_buf_vunmap()
+ *
+ * 2. Importers must not hold the dma-buf reservation lock when calling these
+ *functions:
+ *
+ * - dma_buf_attach()
+ * - dma_buf_dynamic_attach()
+ * - dma_buf_detach()
+ * - dma_buf_export(
+ * - dma_buf_fd()
+ * - dma_buf_get()
+ * - dma_buf_put()
+ * - dma_buf_mmap()
+ * - dma_buf_begin_cpu_access()
+ * - dma_buf_end_cpu_access()
+ * - dma_buf_map_attachment_unlocked()
+ * - dma_buf_unmap_attachment_unlocked()
+ * - dma_buf_vmap_unlocked()
+ * - dma_buf_vunmap_unlocked()
+ *
+ * Convention for exporters
+ *
+ * 1. These _buf_ops callbacks are invoked with unlocked dma-buf
+ *reservation and exporter can take the lock:
+ *
+ * - _buf_ops.attach()
+ * - _buf_ops.detach()
+ * - _buf_ops.release()
+ * - _buf_ops.begin_cpu_access()
+ * - _buf_ops.end_cpu_access()
+ *
+ * 2. These _buf_ops callbacks are invoked with locked dma-buf
+ *reservation and exporter can't take the lock:
+ *
+ * - _buf_ops.pin()
+ * - _buf_ops.unpin()
+ * - _buf_ops.map_dma_buf()
+ * - _buf_ops.unmap_dma_buf()
+ * - _buf_ops.mmap()
+ * - _buf_ops.vmap()
+ * - _buf_ops.vunmap()
+ *
+ * 3. Exporters must hold the dma-buf reservation lock when calling these
+ *functions:
+ *
+ * - dma_buf_move_notify()
+ */
+
 /**
  * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list
  * @dmabuf:[in]buffer to attach device to.
-- 
2.37.3

[PATCH v7 20/21] media: videobuf2: Stop using internal dma-buf lock

2022-10-17 Thread Dmitry Osipenko

All drivers that use dma-bufs have been moved to the updated locking
specification and now dma-buf reservation is guaranteed to be locked
by importers during the mapping operations. There is no need to take
the internal dma-buf lock anymore. Remove locking from the videobuf2
memory allocators.

Acked-by: Tomasz Figa 
Acked-by: Hans Verkuil 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/media/common/videobuf2/videobuf2-dma-contig.c | 11 +--
 drivers/media/common/videobuf2/videobuf2-dma-sg.c | 11 +--
 drivers/media/common/videobuf2/videobuf2-vmalloc.c| 11 +--
 3 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index 79f4d8301fbb..555bd40fa472 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -382,18 +382,12 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_dc_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -409,14 +403,11 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir,
DMA_ATTR_SKIP_CPU_SYNC)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c 
b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 36ecdea8d707..36981a5b5c53 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -424,18 +424,12 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_dma_sg_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -446,14 +440,11 @@ static struct sg_table *vb2_dma_sg_dmabuf_ops_map(
/* mapping to the client with new direction */
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c 
b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
index 7831bf545874..41db707e43a4 100644
--- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
@@ -267,18 +267,12 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map(
struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir)
 {
struct vb2_vmalloc_attachment *attach = db_attach->priv;
-   /* stealing dmabuf mutex to serialize map/unmap operations */
-   struct mutex *lock = _attach->dmabuf->lock;
struct sg_table *sgt;
 
-   mutex_lock(lock);
-
sgt = >sgt;
/* return previously mapped sg table */
-   if (attach->dma_dir == dma_dir) {
-   mutex_unlock(lock);
+   if (attach->dma_dir == dma_dir)
return sgt;
-   }
 
/* release any previous cache */
if (attach->dma_dir != DMA_NONE) {
@@ -289,14 +283,11 @@ static struct sg_table *vb2_vmalloc_dmabuf_ops_map(
/* mapping to the client with new direction */
if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) {
pr_err("failed to map scatterlist\n");
-   mutex_unlock(lock);
return ERR_PTR(-EIO);
}
 
attach->dma_dir = dma_dir;
 
-   mutex_unlock(lock);
-
return sgt;
 }
 
-- 
2.37.3

[PATCH v7 13/21] media: videobuf2: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare V4L2 memory allocators to the common dynamic dma-buf locking
convention by starting to use the unlocked versions of dma-buf API
functions.

Acked-by: Tomasz Figa 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/media/common/videobuf2/videobuf2-dma-contig.c | 11 ++-
 drivers/media/common/videobuf2/videobuf2-dma-sg.c |  8 
 drivers/media/common/videobuf2/videobuf2-vmalloc.c|  6 +++---
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index 678b359717c4..79f4d8301fbb 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -101,7 +101,7 @@ static void *vb2_dc_vaddr(struct vb2_buffer *vb, void 
*buf_priv)
if (buf->db_attach) {
struct iosys_map map;
 
-   if (!dma_buf_vmap(buf->db_attach->dmabuf, ))
+   if (!dma_buf_vmap_unlocked(buf->db_attach->dmabuf, ))
buf->vaddr = map.vaddr;
 
return buf->vaddr;
@@ -711,7 +711,7 @@ static int vb2_dc_map_dmabuf(void *mem_priv)
}
 
/* get the associated scatterlist for this buffer */
-   sgt = dma_buf_map_attachment(buf->db_attach, buf->dma_dir);
+   sgt = dma_buf_map_attachment_unlocked(buf->db_attach, buf->dma_dir);
if (IS_ERR(sgt)) {
pr_err("Error getting dmabuf scatterlist\n");
return -EINVAL;
@@ -722,7 +722,8 @@ static int vb2_dc_map_dmabuf(void *mem_priv)
if (contig_size < buf->size) {
pr_err("contiguous chunk is too small %lu/%lu\n",
   contig_size, buf->size);
-   dma_buf_unmap_attachment(buf->db_attach, sgt, buf->dma_dir);
+   dma_buf_unmap_attachment_unlocked(buf->db_attach, sgt,
+ buf->dma_dir);
return -EFAULT;
}
 
@@ -750,10 +751,10 @@ static void vb2_dc_unmap_dmabuf(void *mem_priv)
}
 
if (buf->vaddr) {
-   dma_buf_vunmap(buf->db_attach->dmabuf, );
+   dma_buf_vunmap_unlocked(buf->db_attach->dmabuf, );
buf->vaddr = NULL;
}
-   dma_buf_unmap_attachment(buf->db_attach, sgt, buf->dma_dir);
+   dma_buf_unmap_attachment_unlocked(buf->db_attach, sgt, buf->dma_dir);
 
buf->dma_addr = 0;
buf->dma_sgt = NULL;
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c 
b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index fa69158a65b1..36ecdea8d707 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -309,7 +309,7 @@ static void *vb2_dma_sg_vaddr(struct vb2_buffer *vb, void 
*buf_priv)
 
if (!buf->vaddr) {
if (buf->db_attach) {
-   ret = dma_buf_vmap(buf->db_attach->dmabuf, );
+   ret = dma_buf_vmap_unlocked(buf->db_attach->dmabuf, 
);
buf->vaddr = ret ? NULL : map.vaddr;
} else {
buf->vaddr = vm_map_ram(buf->pages, buf->num_pages, -1);
@@ -565,7 +565,7 @@ static int vb2_dma_sg_map_dmabuf(void *mem_priv)
}
 
/* get the associated scatterlist for this buffer */
-   sgt = dma_buf_map_attachment(buf->db_attach, buf->dma_dir);
+   sgt = dma_buf_map_attachment_unlocked(buf->db_attach, buf->dma_dir);
if (IS_ERR(sgt)) {
pr_err("Error getting dmabuf scatterlist\n");
return -EINVAL;
@@ -594,10 +594,10 @@ static void vb2_dma_sg_unmap_dmabuf(void *mem_priv)
}
 
if (buf->vaddr) {
-   dma_buf_vunmap(buf->db_attach->dmabuf, );
+   dma_buf_vunmap_unlocked(buf->db_attach->dmabuf, );
buf->vaddr = NULL;
}
-   dma_buf_unmap_attachment(buf->db_attach, sgt, buf->dma_dir);
+   dma_buf_unmap_attachment_unlocked(buf->db_attach, sgt, buf->dma_dir);
 
buf->dma_sgt = NULL;
 }
diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c 
b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
index 948152f1596b..7831bf545874 100644
--- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
@@ -376,7 +376,7 @@ static int vb2_vmalloc_map_dmabuf(void *mem_priv)
struct iosys_map map;
int ret;
 
-   ret = dma_buf_vmap(buf->dbuf, );
+   ret = dma_buf_vmap_unlocked(buf->dbuf, );
if (ret)
return -EFAULT;
buf->vaddr = map.vaddr;
@@ -389,7 +389,7 @@ static void vb2_vmalloc_unmap_dmabuf(void *mem_priv)
struct vb2_vmalloc_buf *buf = mem_priv;
struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr);
 
-   dma_buf_vunmap(buf->dbuf, );
+   dma_buf_vunmap_unlocked(buf->dbuf, );
buf->vaddr = NULL;
 }
 
@@

[PATCH v7 11/21] misc: fastrpc: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare fastrpc to the common dynamic dma-buf locking convention by
starting to use the unlocked versions of dma-buf API functions.

Acked-by: Christian König 
Acked-by: Srinivas Kandagatla 
Signed-off-by: Dmitry Osipenko 
---
 drivers/misc/fastrpc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 93ebd174d848..6fcfb2e9f7a7 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -310,8 +310,8 @@ static void fastrpc_free_map(struct kref *ref)
return;
}
}
-   dma_buf_unmap_attachment(map->attach, map->table,
-DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(map->attach, map->table,
+ DMA_BIDIRECTIONAL);
dma_buf_detach(map->buf, map->attach);
dma_buf_put(map->buf);
}
@@ -726,7 +726,7 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int 
fd,
goto attach_err;
}
 
-   map->table = dma_buf_map_attachment(map->attach, DMA_BIDIRECTIONAL);
+   map->table = dma_buf_map_attachment_unlocked(map->attach, 
DMA_BIDIRECTIONAL);
if (IS_ERR(map->table)) {
err = PTR_ERR(map->table);
goto map_err;
-- 
2.37.3

[PATCH v7 10/21] RDMA/umem: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare InfiniBand drivers to the common dynamic dma-buf locking
convention by starting to use the unlocked versions of dma-buf API
functions.

Acked-by: Jason Gunthorpe 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/infiniband/core/umem_dmabuf.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/core/umem_dmabuf.c 
b/drivers/infiniband/core/umem_dmabuf.c
index 04c04e6d24c3..43b26bc12288 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -26,7 +26,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf 
*umem_dmabuf)
if (umem_dmabuf->sgt)
goto wait_fence;
 
-   sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL);
+   sgt = dma_buf_map_attachment_unlocked(umem_dmabuf->attach,
+ DMA_BIDIRECTIONAL);
if (IS_ERR(sgt))
return PTR_ERR(sgt);
 
@@ -102,8 +103,8 @@ void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf 
*umem_dmabuf)
umem_dmabuf->last_sg_trim = 0;
}
 
-   dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
-DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
 
umem_dmabuf->sgt = NULL;
 }
-- 
2.37.3

[PATCH v7 14/21] media: tegra-vde: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare Tegra video decoder driver to the common dynamic dma-buf
locking convention by starting to use the unlocked versions of dma-buf
API functions.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/media/platform/nvidia/tegra-vde/dmabuf-cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/nvidia/tegra-vde/dmabuf-cache.c 
b/drivers/media/platform/nvidia/tegra-vde/dmabuf-cache.c
index 69c346148070..1c5b94989aec 100644
--- a/drivers/media/platform/nvidia/tegra-vde/dmabuf-cache.c
+++ b/drivers/media/platform/nvidia/tegra-vde/dmabuf-cache.c
@@ -38,7 +38,7 @@ static void tegra_vde_release_entry(struct 
tegra_vde_cache_entry *entry)
if (entry->vde->domain)
tegra_vde_iommu_unmap(entry->vde, entry->iova);
 
-   dma_buf_unmap_attachment(entry->a, entry->sgt, entry->dma_dir);
+   dma_buf_unmap_attachment_unlocked(entry->a, entry->sgt, entry->dma_dir);
dma_buf_detach(dmabuf, entry->a);
dma_buf_put(dmabuf);
 
@@ -102,7 +102,7 @@ int tegra_vde_dmabuf_cache_map(struct tegra_vde *vde,
goto err_unlock;
}
 
-   sgt = dma_buf_map_attachment(attachment, dma_dir);
+   sgt = dma_buf_map_attachment_unlocked(attachment, dma_dir);
if (IS_ERR(sgt)) {
dev_err(dev, "Failed to get dmabufs sg_table\n");
err = PTR_ERR(sgt);
@@ -152,7 +152,7 @@ int tegra_vde_dmabuf_cache_map(struct tegra_vde *vde,
 err_free:
kfree(entry);
 err_unmap:
-   dma_buf_unmap_attachment(attachment, sgt, dma_dir);
+   dma_buf_unmap_attachment_unlocked(attachment, sgt, dma_dir);
 err_detach:
dma_buf_detach(dmabuf, attachment);
 err_unlock:
-- 
2.37.3

[PATCH v7 08/21] drm/tegra: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare Tegra DRM driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/tegra/gem.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 81991090adcc..b09b8ab40ae4 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -84,7 +84,7 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device 
*dev, struct host1x_
goto free;
}
 
-   map->sgt = dma_buf_map_attachment(map->attach, direction);
+   map->sgt = dma_buf_map_attachment_unlocked(map->attach, 
direction);
if (IS_ERR(map->sgt)) {
dma_buf_detach(buf, map->attach);
err = PTR_ERR(map->sgt);
@@ -160,7 +160,8 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device 
*dev, struct host1x_
 static void tegra_bo_unpin(struct host1x_bo_mapping *map)
 {
if (map->attach) {
-   dma_buf_unmap_attachment(map->attach, map->sgt, map->direction);
+   dma_buf_unmap_attachment_unlocked(map->attach, map->sgt,
+ map->direction);
dma_buf_detach(map->attach->dmabuf, map->attach);
} else {
dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
@@ -181,7 +182,7 @@ static void *tegra_bo_mmap(struct host1x_bo *bo)
if (obj->vaddr) {
return obj->vaddr;
} else if (obj->gem.import_attach) {
-   ret = dma_buf_vmap(obj->gem.import_attach->dmabuf, );
+   ret = dma_buf_vmap_unlocked(obj->gem.import_attach->dmabuf, 
);
return ret ? NULL : map.vaddr;
} else {
return vmap(obj->pages, obj->num_pages, VM_MAP,
@@ -197,7 +198,7 @@ static void tegra_bo_munmap(struct host1x_bo *bo, void 
*addr)
if (obj->vaddr)
return;
else if (obj->gem.import_attach)
-   dma_buf_vunmap(obj->gem.import_attach->dmabuf, );
+   dma_buf_vunmap_unlocked(obj->gem.import_attach->dmabuf, );
else
vunmap(addr);
 }
@@ -461,7 +462,7 @@ static struct tegra_bo *tegra_bo_import(struct drm_device 
*drm,
 
get_dma_buf(buf);
 
-   bo->sgt = dma_buf_map_attachment(attach, DMA_TO_DEVICE);
+   bo->sgt = dma_buf_map_attachment_unlocked(attach, DMA_TO_DEVICE);
if (IS_ERR(bo->sgt)) {
err = PTR_ERR(bo->sgt);
goto detach;
@@ -479,7 +480,7 @@ static struct tegra_bo *tegra_bo_import(struct drm_device 
*drm,
 
 detach:
if (!IS_ERR_OR_NULL(bo->sgt))
-   dma_buf_unmap_attachment(attach, bo->sgt, DMA_TO_DEVICE);
+   dma_buf_unmap_attachment_unlocked(attach, bo->sgt, 
DMA_TO_DEVICE);
 
dma_buf_detach(buf, attach);
dma_buf_put(buf);
@@ -508,8 +509,8 @@ void tegra_bo_free_object(struct drm_gem_object *gem)
tegra_bo_iommu_unmap(tegra, bo);
 
if (gem->import_attach) {
-   dma_buf_unmap_attachment(gem->import_attach, bo->sgt,
-DMA_TO_DEVICE);
+   dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt,
+ DMA_TO_DEVICE);
drm_prime_gem_destroy(gem, NULL);
} else {
tegra_bo_free(gem->dev, bo);
-- 
2.37.3

[PATCH v7 09/21] drm/etnaviv: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare Etnaviv driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 3fa2da149639..7031db145a77 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -65,7 +65,7 @@ static void etnaviv_gem_prime_release(struct 
etnaviv_gem_object *etnaviv_obj)
struct iosys_map map = IOSYS_MAP_INIT_VADDR(etnaviv_obj->vaddr);
 
if (etnaviv_obj->vaddr)
-   dma_buf_vunmap(etnaviv_obj->base.import_attach->dmabuf, );
+   
dma_buf_vunmap_unlocked(etnaviv_obj->base.import_attach->dmabuf, );
 
/* Don't drop the pages for imported dmabuf, as they are not
 * ours, just free the array we allocated:
-- 
2.37.3

[PATCH v7 07/21] drm/omapdrm: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare OMAP DRM driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c 
b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 393f82e26927..8e194dbc9506 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -125,7 +125,7 @@ struct drm_gem_object *omap_gem_prime_import(struct 
drm_device *dev,
 
get_dma_buf(dma_buf);
 
-   sgt = dma_buf_map_attachment(attach, DMA_TO_DEVICE);
+   sgt = dma_buf_map_attachment_unlocked(attach, DMA_TO_DEVICE);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
goto fail_detach;
@@ -142,7 +142,7 @@ struct drm_gem_object *omap_gem_prime_import(struct 
drm_device *dev,
return obj;
 
 fail_unmap:
-   dma_buf_unmap_attachment(attach, sgt, DMA_TO_DEVICE);
+   dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_TO_DEVICE);
 fail_detach:
dma_buf_detach(dma_buf, attach);
dma_buf_put(dma_buf);
-- 
2.37.3

[PATCH v7 12/21] xen/gntdev: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare gntdev driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Acked-by: Juergen Gross 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/xen/gntdev-dmabuf.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
index 940e5e9e8a54..4440e626b797 100644
--- a/drivers/xen/gntdev-dmabuf.c
+++ b/drivers/xen/gntdev-dmabuf.c
@@ -600,7 +600,7 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct 
device *dev,
 
gntdev_dmabuf->u.imp.attach = attach;
 
-   sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+   sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
if (IS_ERR(sgt)) {
ret = ERR_CAST(sgt);
goto fail_detach;
@@ -658,7 +658,7 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct 
device *dev,
 fail_end_access:
dmabuf_imp_end_foreign_access(gntdev_dmabuf->u.imp.refs, count);
 fail_unmap:
-   dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
 fail_detach:
dma_buf_detach(dma_buf, attach);
 fail_free_obj:
@@ -708,8 +708,8 @@ static int dmabuf_imp_release(struct gntdev_dmabuf_priv 
*priv, u32 fd)
attach = gntdev_dmabuf->u.imp.attach;
 
if (gntdev_dmabuf->u.imp.sgt)
-   dma_buf_unmap_attachment(attach, gntdev_dmabuf->u.imp.sgt,
-DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(attach, 
gntdev_dmabuf->u.imp.sgt,
+ DMA_BIDIRECTIONAL);
dma_buf = attach->dmabuf;
dma_buf_detach(attach->dmabuf, attach);
dma_buf_put(dma_buf);
-- 
2.37.3

[PATCH v7 06/21] drm/i915: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare i915 driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions
and handling cases where importer now holds the reservation lock.

Acked-by: Christian König 
Reviewed-by: Michael J. Ruhl 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 14 ++
 .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 16 
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index f5062d0c6333..07eee1c09aaf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -72,7 +72,7 @@ static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf,
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
void *vaddr;
 
-   vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+   vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 85482a04d158..7cab89618bad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -290,7 +290,21 @@ void __i915_gem_object_pages_fini(struct 
drm_i915_gem_object *obj)
__i915_gem_object_free_mmaps(obj);
 
atomic_set(>mm.pages_pin_count, 0);
+
+   /*
+* dma_buf_unmap_attachment() requires reservation to be
+* locked. The imported GEM shouldn't share reservation lock
+* and ttm_bo_cleanup_memtype_use() shouldn't be invoked for
+* dma-buf, so it's safe to take the lock.
+*/
+   if (obj->base.import_attach)
+   i915_gem_object_lock(obj, NULL);
+
__i915_gem_object_put_pages(obj);
+
+   if (obj->base.import_attach)
+   i915_gem_object_unlock(obj);
+
GEM_BUG_ON(i915_gem_object_has_pages(obj));
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 51ed824b020c..f2f3cfad807b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -213,7 +213,7 @@ static int igt_dmabuf_import_same_driver(struct 
drm_i915_private *i915,
goto out_import;
}
 
-   st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL);
+   st = dma_buf_map_attachment_unlocked(import_attach, DMA_BIDIRECTIONAL);
if (IS_ERR(st)) {
err = PTR_ERR(st);
goto out_detach;
@@ -226,7 +226,7 @@ static int igt_dmabuf_import_same_driver(struct 
drm_i915_private *i915,
timeout = -ETIME;
}
err = timeout > 0 ? 0 : timeout;
-   dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(import_attach, st, DMA_BIDIRECTIONAL);
 out_detach:
dma_buf_detach(dmabuf, import_attach);
 out_import:
@@ -296,7 +296,7 @@ static int igt_dmabuf_import(void *arg)
goto out_obj;
}
 
-   err = dma_buf_vmap(dmabuf, );
+   err = dma_buf_vmap_unlocked(dmabuf, );
dma_map = err ? NULL : map.vaddr;
if (!dma_map) {
pr_err("dma_buf_vmap failed\n");
@@ -337,7 +337,7 @@ static int igt_dmabuf_import(void *arg)
 
err = 0;
 out_dma_map:
-   dma_buf_vunmap(dmabuf, );
+   dma_buf_vunmap_unlocked(dmabuf, );
 out_obj:
i915_gem_object_put(obj);
 out_dmabuf:
@@ -358,7 +358,7 @@ static int igt_dmabuf_import_ownership(void *arg)
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
 
-   err = dma_buf_vmap(dmabuf, );
+   err = dma_buf_vmap_unlocked(dmabuf, );
ptr = err ? NULL : map.vaddr;
if (!ptr) {
pr_err("dma_buf_vmap failed\n");
@@ -367,7 +367,7 @@ static int igt_dmabuf_import_ownership(void *arg)
}
 
memset(ptr, 0xc5, PAGE_SIZE);
-   dma_buf_vunmap(dmabuf, );
+   dma_buf_vunmap_unlocked(dmabuf, );
 
obj = to_intel_bo(i915_gem_prime_import(>drm, dmabuf));
if (IS_ERR(obj)) {
@@ -418,7 +418,7 @@ static int igt_dmabuf_export_vmap(void *arg)
}
i915_gem_object_put(obj);
 
-   err = dma_buf_vmap(dmabuf, );
+   err = dma_buf_vmap_unlocked(dmabuf, );
ptr = err ? NULL : map.vaddr;
if (!ptr) {
pr_err("dma_buf_vmap failed\n");
@@ -435,7 +435,7 @@ static int igt_dmabuf_export_vmap(void *arg)
memset(ptr, 0xc5, dmabuf->size);
 
err = 0;
-   dma_buf_vunmap(dmabuf, );
+   dma_buf_vunmap_unlocked(dmabuf, );
 out:
dma_buf_put(dmabuf);
return err;
-- 
2.37.3

[PATCH v7 05/21] drm/armada: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare Armada driver to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/armada/armada_gem.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_gem.c 
b/drivers/gpu/drm/armada/armada_gem.c
index 5430265ad458..26d10065d534 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -66,8 +66,8 @@ void armada_gem_free_object(struct drm_gem_object *obj)
if (dobj->obj.import_attach) {
/* We only ever display imported data */
if (dobj->sgt)
-   dma_buf_unmap_attachment(dobj->obj.import_attach,
-dobj->sgt, DMA_TO_DEVICE);
+   
dma_buf_unmap_attachment_unlocked(dobj->obj.import_attach,
+ dobj->sgt, 
DMA_TO_DEVICE);
drm_prime_gem_destroy(>obj, NULL);
}
 
@@ -539,8 +539,8 @@ int armada_gem_map_import(struct armada_gem_object *dobj)
 {
int ret;
 
-   dobj->sgt = dma_buf_map_attachment(dobj->obj.import_attach,
-  DMA_TO_DEVICE);
+   dobj->sgt = dma_buf_map_attachment_unlocked(dobj->obj.import_attach,
+   DMA_TO_DEVICE);
if (IS_ERR(dobj->sgt)) {
ret = PTR_ERR(dobj->sgt);
dobj->sgt = NULL;
-- 
2.37.3

[PATCH v7 04/21] drm/prime: Prepare to dynamic dma-buf locking specification

2022-10-17 Thread Dmitry Osipenko

Prepare DRM prime core to the common dynamic dma-buf locking convention
by starting to use the unlocked versions of dma-buf API functions.

Reviewed-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/drm_prime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index a3f180653b8b..ef50c4e2e509 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -936,7 +936,7 @@ struct drm_gem_object *drm_gem_prime_import_dev(struct 
drm_device *dev,
 
get_dma_buf(dma_buf);
 
-   sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+   sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
goto fail_detach;
@@ -954,7 +954,7 @@ struct drm_gem_object *drm_gem_prime_import_dev(struct 
drm_device *dev,
return obj;
 
 fail_unmap:
-   dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL);
 fail_detach:
dma_buf_detach(dma_buf, attach);
dma_buf_put(dma_buf);
@@ -1052,7 +1052,7 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, 
struct sg_table *sg)
 
attach = obj->import_attach;
if (sg)
-   dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
+   dma_buf_unmap_attachment_unlocked(attach, sg, 
DMA_BIDIRECTIONAL);
dma_buf = attach->dmabuf;
dma_buf_detach(attach->dmabuf, attach);
/* remove the reference */
-- 
2.37.3

[PATCH v7 03/21] drm/gem: Take reservation lock for vmap/vunmap operations

2022-10-17 Thread Dmitry Osipenko

The new common dma-buf locking convention will require buffer importers
to hold the reservation lock around mapping operations. Make DRM GEM core
to take the lock around the vmapping operations and update DRM drivers to
use the locked functions for the case where DRM core now holds the lock.
This patch prepares DRM core and drivers to the common dynamic dma-buf
locking convention.

Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/drm_client.c |  4 ++--
 drivers/gpu/drm/drm_gem.c| 24 
 drivers/gpu/drm/drm_gem_dma_helper.c |  6 ++---
 drivers/gpu/drm/drm_gem_framebuffer_helper.c |  6 ++---
 drivers/gpu/drm/drm_gem_ttm_helper.c |  9 +---
 drivers/gpu/drm/lima/lima_sched.c|  4 ++--
 drivers/gpu/drm/panfrost/panfrost_dump.c |  4 ++--
 drivers/gpu/drm/panfrost/panfrost_perfcnt.c  |  6 ++---
 drivers/gpu/drm/qxl/qxl_object.c | 17 +++---
 drivers/gpu/drm/qxl/qxl_prime.c  |  4 ++--
 include/drm/drm_gem.h|  3 +++
 11 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 2b230b4d6942..fbcb1e995384 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -323,7 +323,7 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer,
 * fd_install step out of the driver backend hooks, to make that
 * final step optional for internal users.
 */
-   ret = drm_gem_vmap(buffer->gem, map);
+   ret = drm_gem_vmap_unlocked(buffer->gem, map);
if (ret)
return ret;
 
@@ -345,7 +345,7 @@ void drm_client_buffer_vunmap(struct drm_client_buffer 
*buffer)
 {
struct iosys_map *map = >map;
 
-   drm_gem_vunmap(buffer->gem, map);
+   drm_gem_vunmap_unlocked(buffer->gem, map);
 }
 EXPORT_SYMBOL(drm_client_buffer_vunmap);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 86d670c71286..dbee4863e4f7 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1171,6 +1171,8 @@ int drm_gem_vmap(struct drm_gem_object *obj, struct 
iosys_map *map)
 {
int ret;
 
+   dma_resv_assert_held(obj->resv);
+
if (!obj->funcs->vmap)
return -EOPNOTSUPP;
 
@@ -1186,6 +1188,8 @@ EXPORT_SYMBOL(drm_gem_vmap);
 
 void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
+   dma_resv_assert_held(obj->resv);
+
if (iosys_map_is_null(map))
return;
 
@@ -1197,6 +1201,26 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct 
iosys_map *map)
 }
 EXPORT_SYMBOL(drm_gem_vunmap);
 
+int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map)
+{
+   int ret;
+
+   dma_resv_lock(obj->resv, NULL);
+   ret = drm_gem_vmap(obj, map);
+   dma_resv_unlock(obj->resv);
+
+   return ret;
+}
+EXPORT_SYMBOL(drm_gem_vmap_unlocked);
+
+void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map)
+{
+   dma_resv_lock(obj->resv, NULL);
+   drm_gem_vunmap(obj, map);
+   dma_resv_unlock(obj->resv);
+}
+EXPORT_SYMBOL(drm_gem_vunmap_unlocked);
+
 /**
  * drm_gem_lock_reservations - Sets up the ww context and acquires
  * the lock on an array of GEM objects.
diff --git a/drivers/gpu/drm/drm_gem_dma_helper.c 
b/drivers/gpu/drm/drm_gem_dma_helper.c
index f6901ff97bbb..1e658c448366 100644
--- a/drivers/gpu/drm/drm_gem_dma_helper.c
+++ b/drivers/gpu/drm/drm_gem_dma_helper.c
@@ -230,7 +230,7 @@ void drm_gem_dma_free(struct drm_gem_dma_object *dma_obj)
 
if (gem_obj->import_attach) {
if (dma_obj->vaddr)
-   dma_buf_vunmap(gem_obj->import_attach->dmabuf, );
+   dma_buf_vunmap_unlocked(gem_obj->import_attach->dmabuf, 
);
drm_prime_gem_destroy(gem_obj, dma_obj->sgt);
} else if (dma_obj->vaddr) {
if (dma_obj->map_noncoherent)
@@ -581,7 +581,7 @@ drm_gem_dma_prime_import_sg_table_vmap(struct drm_device 
*dev,
struct iosys_map map;
int ret;
 
-   ret = dma_buf_vmap(attach->dmabuf, );
+   ret = dma_buf_vmap_unlocked(attach->dmabuf, );
if (ret) {
DRM_ERROR("Failed to vmap PRIME buffer\n");
return ERR_PTR(ret);
@@ -589,7 +589,7 @@ drm_gem_dma_prime_import_sg_table_vmap(struct drm_device 
*dev,
 
obj = drm_gem_dma_prime_import_sg_table(dev, attach, sgt);
if (IS_ERR(obj)) {
-   dma_buf_vunmap(attach->dmabuf, );
+   dma_buf_vunmap_unlocked(attach->dmabuf, );
return obj;
}
 
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c 
b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 880a4975507f..e35e224e6303 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -354,7 +354,7 @@ int

[PATCH v7 01/21] dma-buf: Add unlocked variant of vmapping functions

2022-10-17 Thread Dmitry Osipenko

Add unlocked variant of dma_buf_vmap/vunmap() that will be utilized
by drivers that don't take the reservation lock explicitly.

Acked-by: Sumit Semwal 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 43 +++
 include/linux/dma-buf.h   |  2 ++
 2 files changed, 45 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index efb4990b29e1..e95fc8dc3aed 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1425,6 +1425,33 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct 
iosys_map *map)
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, DMA_BUF);
 
+/**
+ * dma_buf_vmap_unlocked - Create virtual mapping for the buffer object into 
kernel
+ * address space. Same restrictions as for vmap and friends apply.
+ * @dmabuf:[in]buffer to vmap
+ * @map:   [out]   returns the vmap pointer
+ *
+ * Unlocked version of dma_buf_vmap()
+ *
+ * Returns 0 on success, or a negative errno code otherwise.
+ */
+int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map)
+{
+   int ret;
+
+   iosys_map_clear(map);
+
+   if (WARN_ON(!dmabuf))
+   return -EINVAL;
+
+   dma_resv_lock(dmabuf->resv, NULL);
+   ret = dma_buf_vmap(dmabuf, map);
+   dma_resv_unlock(dmabuf->resv);
+
+   return ret;
+}
+EXPORT_SYMBOL_NS_GPL(dma_buf_vmap_unlocked, DMA_BUF);
+
 /**
  * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap.
  * @dmabuf:[in]buffer to vunmap
@@ -1449,6 +1476,22 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct 
iosys_map *map)
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, DMA_BUF);
 
+/**
+ * dma_buf_vunmap_unlocked - Unmap a vmap obtained by dma_buf_vmap.
+ * @dmabuf:[in]buffer to vunmap
+ * @map:   [in]vmap pointer to vunmap
+ */
+void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map)
+{
+   if (WARN_ON(!dmabuf))
+   return;
+
+   dma_resv_lock(dmabuf->resv, NULL);
+   dma_buf_vunmap(dmabuf, map);
+   dma_resv_unlock(dmabuf->resv);
+}
+EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF);
+
 #ifdef CONFIG_DEBUG_FS
 static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 71731796c8c3..8daa054dd7fe 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -632,4 +632,6 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 unsigned long);
 int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
 void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
+int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
 #endif /* __DMA_BUF_H__ */
-- 
2.37.3

[PATCH v7 02/21] dma-buf: Add unlocked variant of attachment-mapping functions

2022-10-17 Thread Dmitry Osipenko

Add unlocked variant of dma_buf_map/unmap_attachment() that will
be used by drivers that don't take the reservation lock explicitly.

Acked-by: Sumit Semwal 
Acked-by: Christian König 
Signed-off-by: Dmitry Osipenko 
---
 drivers/dma-buf/dma-buf.c | 53 +++
 include/linux/dma-buf.h   |  6 +
 2 files changed, 59 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e95fc8dc3aed..3e4060dadb74 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1100,6 +1100,34 @@ struct sg_table *dma_buf_map_attachment(struct 
dma_buf_attachment *attach,
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, DMA_BUF);
 
+/**
+ * dma_buf_map_attachment_unlocked - Returns the scatterlist table of the 
attachment;
+ * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
+ * dma_buf_ops.
+ * @attach:[in]attachment whose scatterlist is to be returned
+ * @direction: [in]direction of DMA transfer
+ *
+ * Unlocked variant of dma_buf_map_attachment().
+ */
+struct sg_table *
+dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach,
+   enum dma_data_direction direction)
+{
+   struct sg_table *sg_table;
+
+   might_sleep();
+
+   if (WARN_ON(!attach || !attach->dmabuf))
+   return ERR_PTR(-EINVAL);
+
+   dma_resv_lock(attach->dmabuf->resv, NULL);
+   sg_table = dma_buf_map_attachment(attach, direction);
+   dma_resv_unlock(attach->dmabuf->resv);
+
+   return sg_table;
+}
+EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment_unlocked, DMA_BUF);
+
 /**
  * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
  * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
@@ -1136,6 +1164,31 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment 
*attach,
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, DMA_BUF);
 
+/**
+ * dma_buf_unmap_attachment_unlocked - unmaps and decreases usecount of the 
buffer;might
+ * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
+ * dma_buf_ops.
+ * @attach:[in]attachment to unmap buffer from
+ * @sg_table:  [in]scatterlist info of the buffer to unmap
+ * @direction: [in]direction of DMA transfer
+ *
+ * Unlocked variant of dma_buf_unmap_attachment().
+ */
+void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach,
+  struct sg_table *sg_table,
+  enum dma_data_direction direction)
+{
+   might_sleep();
+
+   if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
+   return;
+
+   dma_resv_lock(attach->dmabuf->resv, NULL);
+   dma_buf_unmap_attachment(attach, sg_table, direction);
+   dma_resv_unlock(attach->dmabuf->resv);
+}
+EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, DMA_BUF);
+
 /**
  * dma_buf_move_notify - notify attachments that DMA-buf is moving
  *
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 8daa054dd7fe..f11b5bbc2f37 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -627,6 +627,12 @@ int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
   enum dma_data_direction dir);
+struct sg_table *
+dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach,
+   enum dma_data_direction direction);
+void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach,
+  struct sg_table *sg_table,
+  enum dma_data_direction direction);
 
 int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 unsigned long);
-- 
2.37.3

[PATCH v7 00/21] Move all drivers to a common dma-buf locking convention

2022-10-17 Thread Dmitry Osipenko

Hello,

This series moves all drivers to a dynamic dma-buf locking specification.
>From now on all dma-buf importers are made responsible for holding
dma-buf's reservation lock around all operations performed over dma-bufs
in accordance to the locking specification. This allows us to utilize
reservation lock more broadly around kernel without fearing of a potential
deadlocks.

This patchset passes all i915 selftests. It was also tested using VirtIO,
Panfrost, Lima, Tegra, udmabuf, AMDGPU and Nouveau drivers. I tested cases
of display+GPU, display+V4L and GPU+V4L dma-buf sharing (where appropriate),
which covers majority of kernel drivers since rest of the drivers share
same or similar code paths.

Changelog:

v7: - Rebased on top of recent drm-misc-next.

- Added ack from Jason Gunthorpe to the RDMA patch.

- Added iosys_map_clear() to dma_buf_vmap_unlocked(), making it fully
  consistent with dma_buf_vmap().

v6: - Added r-b from Michael Ruhl to the i915 patch.

- Added acks from Sumit Semwal and updated commit message of the
  "Move dma_buf_vmap() to dynamic locking specification" patch like
  was suggested by Sumit.

- Added "!dmabuf" check to dma_buf_vmap_unlocked() to match the locked
  variant of the function, for consistency.

v5: - Added acks and r-bs that were given to v4.

- Changed i915 preparation patch like was suggested by Michael Ruhl.
  The scope of reservation locking is smaller now.

v4: - Added dma_buf_mmap() to the "locking convention" documentation,
  which was missed by accident in v3.

- Added acks from Christian König, Tomasz Figa and Hans Verkuil that
  they gave to couple v3 patches.

- Dropped the "_unlocked" postfix from function names that don't have
  the locked variant, as was requested by Christian König.

- Factored out the per-driver preparations into separate patches
  to ease reviewing of the changes, which is now doable without the
  global dma-buf functions renaming.

- Factored out the dynamic locking convention enforcements into separate
  patches which add the final dma_resv_assert_held(dmabuf->resv) to the
  dma-buf API functions.

v3: - Factored out dma_buf_mmap_unlocked() and attachment functions
  into aseparate patches, like was suggested by Christian König.

- Corrected and factored out dma-buf locking documentation into
  a separate patch, like was suggested by Christian König.

- Intel driver dropped the reservation locking fews days ago from
  its BO-release code path, but we need that locking for the imported
  GEMs because in the end that code path unmaps the imported GEM.
  So I added back the locking needed by the imported GEMs, updating
  the "dma-buf attachment locking specification" patch appropriately.

- Tested Nouveau+Intel dma-buf import/export combo.

- Tested udmabuf import to i915/Nouveau/AMDGPU.

- Fixed few places in Etnaviv, Panfrost and Lima drivers that I missed
  to switch to locked dma-buf vmapping in the drm/gem: Take reservation
  lock for vmap/vunmap operations" patch. In a result invalidated the
  Christian's r-b that he gave to v2.

- Added locked dma-buf vmap/vunmap functions that are needed for fixing
  vmappping of Etnaviv, Panfrost and Lima drivers mentioned above.
  I actually had this change stashed for the drm-shmem shrinker patchset,
  but then realized that it's already needed by the dma-buf patches.
  Also improved my tests to better cover these code paths.

v2: - Changed locking specification to avoid problems with a cross-driver
  ww locking, like was suggested by Christian König. Now the attach/detach
  callbacks are invoked without the held lock and exporter should take the
  lock.

- Added "locking convention" documentation that explains which dma-buf
  functions and callbacks are locked/unlocked for importers and exporters,
  which was requested by Christian König.

- Added ack from Tomasz Figa to the V4L patches that he gave to v1.

Dmitry Osipenko (21):
  dma-buf: Add unlocked variant of vmapping functions
  dma-buf: Add unlocked variant of attachment-mapping functions
  drm/gem: Take reservation lock for vmap/vunmap operations
  drm/prime: Prepare to dynamic dma-buf locking specification
  drm/armada: Prepare to dynamic dma-buf locking specification
  drm/i915: Prepare to dynamic dma-buf locking specification
  drm/omapdrm: Prepare to dynamic dma-buf locking specification
  drm/tegra: Prepare to dynamic dma-buf locking specification
  drm/etnaviv: Prepare to dynamic dma-buf locking specification
  RDMA/umem: Prepare to dynamic dma-buf locking specification
  misc: fastrpc: Prepare to dynamic dma-buf locking specification
  xen/gntdev: Prepare to dynamic dma-buf locking specification
  media: videobuf2: Prepare to dynamic dma-buf locking specification
  media: tegra-vde: Prepare to dynamic dma-buf locking specification

Re: [PATCH 4/7] drm/vc4: dpi: Support RGB565 format

2022-10-17 Thread Dave Stevenson

Hi Laurent

On Sat, 15 Oct 2022 at 18:29, Laurent Pinchart
 wrote:
>
> Hi Maxime and Chris,
>
> Thank you for the patch.
>
> On Thu, Oct 13, 2022 at 11:56:48AM +0200, Maxime Ripard wrote:
> > From: Chris Morgan 
> >
> > The RGB565 format with padding over 24 bits
> > (MEDIA_BUS_FMT_RGB565_1X24_CPADHI) is supported by the vc4 DPI
> > controller as "mode 3".  This is what the Geekworm MZP280 DPI display
>
> The code below uses DPI_FORMAT_16BIT_565_RGB_2. Is that mode 3, or
> should the commit message refer to mode 2 ?

It's a mis-mash of documentation from the firmware stack.
[1] lists the firmware modes, which start at 1 for the equivalent of
DPI_FORMAT_9BIT_666_RGB, so there padded RGB565 is mode 3.

I'd advocate dropping the reference to which mode it is:
The RGB565 format with padding over 24 bits
(MEDIA_BUS_FMT_RGB565_1X24_CPADHI) is supported by the vc4 DPI controller.

  Dave

[1] 
https://www.raspberrypi.com/documentation/computers/raspberry-pi.html#parallel-display-interface-dpi

> With this fixed,
>
> Reviewed-by: Laurent Pinchart 
>
> > uses, so let's add support for it in the DPI controller driver.
> >
> > Reviewed-by: Dave Stevenson 
> > Signed-off-by: Chris Morgan 
> > Signed-off-by: Maxime Ripard 
> > ---
> >  drivers/gpu/drm/vc4/vc4_dpi.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
> > index 1f8f44b7b5a5..7da3dd1db50e 100644
> > --- a/drivers/gpu/drm/vc4/vc4_dpi.c
> > +++ b/drivers/gpu/drm/vc4/vc4_dpi.c
> > @@ -182,6 +182,10 @@ static void vc4_dpi_encoder_enable(struct drm_encoder 
> > *encoder)
> >   dpi_c |= 
> > VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_3,
> >  DPI_FORMAT);
> >   break;
> > + case MEDIA_BUS_FMT_RGB565_1X24_CPADHI:
> > + dpi_c |= 
> > VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_2,
> > +DPI_FORMAT);
> > + break;
> >   default:
> >   DRM_ERROR("Unknown media bus format %d\n",
> > bus_format);
> >
>
> --
> Regards,
>
> Laurent Pinchart

Re: [PATCH 6/7] drm/vc4: dpi: Change the default DPI format to being 18bpp, not 24.

2022-10-17 Thread Dave Stevenson

Hi Laurent

On Sat, 15 Oct 2022 at 18:14, Laurent Pinchart
 wrote:
>
> Hi Maxime (and Dave),
>
> Thank you for the patch.
>
> On Thu, Oct 13, 2022 at 11:56:50AM +0200, Maxime Ripard wrote:
> > From: Dave Stevenson 
> >
> > DPI hasn't really been used up until now, so the default has
> > been meaningless.
> > In theory we should be able to pass the desired format for the
> > adjacent bridge chip through, but framework seems to be missing
> > for that.
>
> Doesn't the bridge infrastructure allow that ? Or maybe this commit
> message was written a while ago, before it was possible ?

Infrastructure may do, but it isn't always implemented.
Rightly or wrongly, the driver is currently finding the associated
connector, as commented as [1].

The main chain I'm looking is:
vc4_dpi -> dumb-vga-dac -> vga-connector
Unless I'm missing something, nothing in that chain defines the format
for the output. vga-connector (display-connector driver) tries to ask
the previous bridge for formats via atomic_get_output_bus_fmts and
atomic_get_input_fmts, but dumb-vga-dac (simple-bridge) doesn't
implement them.
So even if we tried following the chain it dies due to , and currently
the connector has no bus_formats defined in display_info, hence we end
up needing a default.
(Why do I get deja vu with panel-dpi and bus-format discussions?!)

Panels are fine as they do generally have a bus_format defined in display_info.

[1] 
https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/vc4/vc4_dpi.c#L139

> In any case, it would be nice to use the bus format exposed by the next
> bridge in the chain, but that can be done in a subsequent step. The new
> default seems reasonable.

I'll add having a look at atomic_get_output_bus_fmts and
atomic_get_input_fmts to my list of tasks again.

  Dave

> Reviewed-by: Laurent Pinchart 
>
> > As the main device to use DPI is the VGA666 or Adafruit Kippah,
> > both of which use RGB666, change the default to being RGB666 instead
> > of RGB888.
> >
> > Signed-off-by: Dave Stevenson 
> > Signed-off-by: Maxime Ripard 
> > ---
> >  drivers/gpu/drm/vc4/vc4_dpi.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
> > index ecbe4cd87036..fdae02760b6d 100644
> > --- a/drivers/gpu/drm/vc4/vc4_dpi.c
> > +++ b/drivers/gpu/drm/vc4/vc4_dpi.c
> > @@ -150,8 +150,8 @@ static void vc4_dpi_encoder_enable(struct drm_encoder 
> > *encoder)
> >   }
> >   drm_connector_list_iter_end(_iter);
> >
> > - /* Default to 24bit if no connector or format found. */
> > - dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
> > + /* Default to 18bit if no connector or format found. */
> > + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1, DPI_FORMAT);
> >
> >   if (connector) {
> >   if (connector->display_info.num_bus_formats) {
> >
>
> --
> Regards,
>
> Laurent Pinchart

Re: [Intel-gfx] [PATCH v3 14/14] drm/i915/xelpmp: Add multicast steering for media GT

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> MTL's media IP (Xe_LPM+) only has a single type of steering ("OAADDRM")
> which selects between media slice 0 and media slice 1.  We'll always
> steer to media slice 0 unless it is fused off (which is the case when
> VD0, VE0, and SFC0 are all reported as unavailable).
> 
> Bspec: 67789
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 18 --
>  drivers/gpu/drm/i915/gt/intel_gt_types.h|  1 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 +++--
>  3 files changed, 32 insertions(+), 4 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index 23a1ef9659bf..0d2811724b00 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -42,6 +42,7 @@ static const char * const intel_steering_types[] = {
>   "LNCF",
>   "GAM",
>   "DSS",
> + "OADDRM",
>   "INSTANCE 0",
>  };
>  
> @@ -129,6 +130,11 @@ static const struct intel_mmio_range 
> xelpg_dss_steering_table[] = {
>   { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
>  };
>  
> +static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
> + { 0x393200, 0x39323F },
> + { 0x393400, 0x3934FF },
> +};
> +
>  void intel_gt_mcr_init(struct intel_gt *gt)
>  {
>   struct drm_i915_private *i915 = gt->i915;
> @@ -151,8 +157,9 @@ void intel_gt_mcr_init(struct intel_gt *gt)
>   drm_warn(>drm, "mslice mask all zero!\n");
>   }
>  
> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70) &&
> - gt->type == GT_PRIMARY) {
> + if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
> + gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
> + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
>   fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
>intel_uncore_read(gt->uncore, XEHP_FUSE4));
>  
> @@ -514,6 +521,13 @@ static void get_nonterminated_steering(struct intel_gt 
> *gt,
>   *group = 0;
>   *instance = 0;
>   break;
> + case OADDRM:
> + if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & 
> BIT(0))
> + *group = 0;
> + else
> + *group = 1;
> + *instance = 0;
> + break;
>   default:
>   MISSING_CASE(type);
>   *group = 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 0bb73d110a84..64aa2ba624fc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -61,6 +61,7 @@ enum intel_steering_type {
>   LNCF,
>   GAM,
>   DSS,
> + OADDRM,
>  
>   /*
>* On some platforms there are multiple types of MCR registers that
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 711a31935857..bae960486872 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1598,14 +1598,27 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   debug_dump_steering(gt);
>  }
>  
> +static void
> +xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
> +{
> + /* FIXME: Actual workarounds will be added in future patch(es) */
> +
> + debug_dump_steering(gt);
> +}
> +
>  static void
>  gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
>  {
>   struct drm_i915_private *i915 = gt->i915;
>  
> - /* FIXME: Media GT handling will be added in an upcoming patch */
> - if (gt->type == GT_MEDIA)
> + if (gt->type == GT_MEDIA) {
> + if (MEDIA_VER(i915) >= 13)
> + xelpmp_gt_workarounds_init(gt, wal);
> + else
> + MISSING_CASE(MEDIA_VER(i915));
> +
>   return;
> + }
>  
>   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>   xelpg_gt_workarounds_init(gt, wal);
> -- 
> 2.37.3
>

Re: [PATCH v3 13/14] drm/i915/xelpg: Add multicast steering

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> MTL's graphics IP (Xe_LPG) once again changes the multicast register
> types and steering details.  Key changes from past platforms:
>  * The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
>according to the MTL subplatform and cannot be read from fuse
>registers.  However steering to instance #0 will always provided a
>non-terminated value, so we can lump these all into a single
>"instance0" table.
>  * The MCR steering register (and its bitfields) has changed.
> 
> Unlike past platforms, we will be explicitly steering all types of MCR
> accesses, including those for "SLICE" and "DSS" ranges; we no longer
> rely on implicit steering.  On previous platforms, various
> hardware/firmware agents that needed to access registers typically had
> their own steering control registers, allowing them to perform multicast
> steering without clobbering the CPU/kernel steering.  Starting with MTL,
> more of these agents now share a single steering register (0xFD4) and it
> is no longer safe for us to assume that the value will remain unchanged
> from how we initialized it during startup.  There is also a slight
> chance of race conditions between the driver and a hardware/firmware
> agent, so the hardware provides a semaphore register that can be used to
> coordinate access to the steering register.  Support for the semaphore
> register will be introduced in a future patch.
> 
> v2:
>  - Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
>we're matching on now rather than the platform.
>  - Don't combine l3bank and mslice masks into a union.  It's not related
>to the other changes here and we might still need both of them on
>some future platform.
>  - Separate debug dumping of steering settings to a separate helper
>function.  (Tvrtko)
>  - Update debug dumping to include DSS ranges (and future-proof it so
>that any new ranges added on future platforms will also be dumped).
>  - Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
>cleared it.  Also force the MULTICAST bit to true at the beginning of
>multicast writes just to be safe.  (Bala)
> 
> Bspec: 67788, 67112
> Cc: Radhakrishna Sripada 
> Cc: Balasubramani Vivekanandan 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 135 +---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |   5 +
>  drivers/gpu/drm/i915/gt/intel_gt_types.h|   1 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c |  33 -
>  drivers/gpu/drm/i915/i915_pci.c |   1 +
>  5 files changed, 154 insertions(+), 21 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index 349074bf365f..23a1ef9659bf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -41,6 +41,7 @@ static const char * const intel_steering_types[] = {
>   "MSLICE",
>   "LNCF",
>   "GAM",
> + "DSS",
>   "INSTANCE 0",
>  };
>  
> @@ -99,9 +100,40 @@ static const struct intel_mmio_range 
> pvc_instance0_steering_table[] = {
>   {},
>  };
>  
> +static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
> + { 0x000B00, 0x000BFF }, /* SQIDI */
> + { 0x001000, 0x001FFF }, /* SQIDI */
> + { 0x004000, 0x0048FF }, /* GAM */
> + { 0x008700, 0x0087FF }, /* SQIDI */
> + { 0x00B000, 0x00B0FF }, /* NODE */
> + { 0x00C800, 0x00CFFF }, /* GAM */
> + { 0x00D880, 0x00D8FF }, /* NODE */
> + { 0x00DD00, 0x00DDFF }, /* OAAL2 */
> + {},
> +};
> +
> +static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
> + { 0x00B100, 0x00B3FF },
> + {},
> +};
> +
> +/* DSS steering is used for SLICE ranges as well */
> +static const struct intel_mmio_range xelpg_dss_steering_table[] = {
> + { 0x005200, 0x0052FF }, /* SLICE */
> + { 0x005500, 0x007FFF }, /* SLICE */
> + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS 
> (0x8150-0x815F) */
> + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS 
> (0x9520-0x955F) */
> + { 0x009680, 0x0096FF }, /* DSS */
> + { 0x00D800, 0x00D87F }, /* SLICE */
> + { 0x00DC00, 0x00DCFF }, /* SLICE */
> + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
> +};
> +
>  void intel_gt_mcr_init(struct intel_gt *gt)
>  {
>   struct drm_i915_private *i915 = gt->i915;
> + unsigned long fuse;
> + int i;
>  
>   /*
>* An mslice is unavailable only if both the meml3 for the slice is
> @@ -119,7 +151,22 @@ void intel_gt_mcr_init(struct intel_gt *gt)
>   drm_warn(>drm, "mslice mask all zero!\n");
>   }
>  
> - if (IS_PONTEVECCHIO(i915)) {
> + if

Re: [Intel-gfx] [PATCH v3 12/14] drm/i915: Define multicast registers as a new type

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Rather than treating multicast registers as 'i915_reg_t' let's define
> them as a completely new type.  This will allow the compiler to help us
> make sure we're using multicast-aware functions to operate on multicast
> registers.
> 
> This plan does break down a bit in places where we're just maintaining
> heterogeneous lists of registers (e.g., various MMIO whitelists used by
> perf, GVT, etc.) rather than performing reads/writes.  We only really
> care about the offset in those cases, so for now we can "cast" the
> registers as non-MCR, leaving us with a list of i915_reg_t's, but we may
> want to look for better ways to store mixed collections of i915_reg_t
> and i915_mcr_reg_t in the future.
> 
> v2:
>  - Add TLB invalidation registers
> v3:
>  - Make type checking of i915_mmio_reg_offset() stricter.  It will
>accept either i915_reg_t or i915_mcr_reg_t, but will now raise a
>compile error if any other type is passed, even if that type contains
>a 'reg' field.  (Jani)
>  - Drop a ton of GVT changes; allowing i915_mmio_reg_offset() to take
>either an i915_reg_t or an i915_mcr_reg_t means that the huge lists
>of MMIO_D*() macros used in GVT will continue to work without
>modification.  We need only make changes to structures that have an
>explicit i915_reg_t in them now.
> 
> Cc: Jani Nikula 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c| 16 --
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c| 51 ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.h| 18 +++
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 27 +++---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 32 ++--
>  .../gpu/drm/i915/gt/intel_workarounds_types.h |  5 +-
>  .../gpu/drm/i915/gt/selftest_workarounds.c|  2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|  2 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c|  4 +-
>  drivers/gpu/drm/i915/gvt/handlers.c   |  2 +-
>  drivers/gpu/drm/i915/gvt/mmio_context.c   | 14 ++---
>  drivers/gpu/drm/i915/i915_reg_defs.h  | 27 +-
>  12 files changed, 117 insertions(+), 83 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 3df0d0336dbc..27dbb9e4bd6c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -991,7 +991,10 @@ void intel_gt_info_print(const struct intel_gt_info 
> *info,
>  }
>  
>  struct reg_and_bit {
> - i915_reg_t reg;
> + union {
> + i915_reg_t reg;
> + i915_mcr_reg_t mcr_reg;
> + };
>   u32 bit;
>  };
>  
> @@ -1033,7 +1036,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, 
> const bool gen8,
>  static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>  {
>   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> - return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
> + return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
>   TLB_INVAL_TIMEOUT_US,
>   TLB_INVAL_TIMEOUT_MS);
>   else
> @@ -1058,7 +1061,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
>   [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
>   };
> - static const i915_reg_t xehp_regs[] = {
> + static const i915_mcr_reg_t xehp_regs[] = {
>   [RENDER_CLASS]  = XEHP_GFX_TLB_INV_CR,
>   [VIDEO_DECODE_CLASS]= XEHP_VD_TLB_INV_CR,
>   [VIDEO_ENHANCEMENT_CLASS]   = XEHP_VE_TLB_INV_CR,
> @@ -1131,7 +1134,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   for_each_engine_masked(engine, gt, awake, tmp) {
>   struct reg_and_bit rb;
>  
> - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + rb.mcr_reg = xehp_regs[engine->class];
> + rb.bit = BIT(engine->instance);
> + } else {
> + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, 
> num);
> + }
>  
>   if (wait_for_invalidate(gt, rb))
>   drm_err_ratelimited(>i915->drm,
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index 1ed9bc4dccfd..349074bf365f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -150,6 +150,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
>   }
>  }
>  
> +/*
> + * Although the rest of the driver should use MCR-specific functions to
> + * read/write MCR registers, we still use the

Re: [PATCH] drm/amdkfd: Fix type of reset_type parameter in hqd_destroy() callback

2022-10-17 Thread Alex Deucher

Applied.  Thanks!

Alex

On Mon, Oct 17, 2022 at 12:30 PM Nathan Chancellor  wrote:
>
> When booting a kernel compiled with CONFIG_CFI_CLANG on a machine with
> an RX 6700 XT, there is a CFI failure in kfd_destroy_mqd_cp():
>
>   [   12.894543] CFI failure at kfd_destroy_mqd_cp+0x2a/0x40 [amdgpu] 
> (target: hqd_destroy_v10_3+0x0/0x260 [amdgpu]; expected type: 0x8594d794)
>
> Clang's kernel Control Flow Integrity (kCFI) makes sure that all
> indirect call targets have a type that exactly matches the function
> pointer prototype. In this case, hqd_destroy()'s third parameter,
> reset_type, should have a type of 'uint32_t' but every implementation of
> this callback has a third parameter type of 'enum kfd_preempt_type'.
>
> Update the function pointer prototype to match reality so that there is
> no more CFI violation.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1738
> Signed-off-by: Nathan Chancellor 
> ---
>
> No Fixes tag, as I could not pin down exactly when this started. I
> suspect it is
>
> Fixes: 70539bd79500 ("drm/amd: Update MEC HQD loading code for KFD")
>
> but I did not want to add that without a second look. Feel free to add
> it during patch application if it makes sense.
>
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index e85364dff4e0..5cb3e8634739 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -262,8 +262,9 @@ struct kfd2kgd_calls {
> uint32_t queue_id);
>
> int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd,
> -   uint32_t reset_type, unsigned int timeout,
> -   uint32_t pipe_id, uint32_t queue_id);
> +   enum kfd_preempt_type reset_type,
> +   unsigned int timeout, uint32_t pipe_id,
> +   uint32_t queue_id);
>
> bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd);
>
>
> base-commit: 9abf2313adc1ca1b6180c508c25f22f9395cc780
> --
> 2.38.0
>

Re: [Intel-gfx] [PATCH v3 11/14] drm/i915/gt: Add MCR-specific workaround initializers

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Let's be more explicit about which of our workarounds are updating MCR
> registers.
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 433 +++---
>  .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
>  2 files changed, 263 insertions(+), 174 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 96b9f02a2284..7671994d5b7a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t 
> reg,
>   _wa_add(wal, );
>  }
>  
> +static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
> +u32 clear, u32 set, u32 read_mask, bool masked_reg)
> +{
> + struct i915_wa wa = {
> + .reg  = reg,
> + .clr  = clear,
> + .set  = set,
> + .read = read_mask,
> + .masked_reg = masked_reg,
> + .is_mcr = 1,
> + };
> +
> + _wa_add(wal, );
> +}
> +
>  static void
>  wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 
> set)
>  {
>   wa_add(wal, reg, clear, set, clear, false);
>  }
>  
> +static void
> +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, 
> u32 set)
> +{
> + wa_mcr_add(wal, reg, clear, set, clear, false);
> +}
> +
>  static void
>  wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
>  {
> @@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, 
> u32 set)
>   wa_write_clr_set(wal, reg, set, set);
>  }
>  
> +static void
> +wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
> +{
> + wa_mcr_write_clr_set(wal, reg, set, set);
> +}
> +
>  static void
>  wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
>  {
>   wa_write_clr_set(wal, reg, clr, 0);
>  }
>  
> +static void
> +wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
> +{
> + wa_mcr_write_clr_set(wal, reg, clr, 0);
> +}
> +
>  /*
>   * WA operations on "masked register". A masked register has the upper 16 
> bits
>   * documented as "masked" in b-spec. Its purpose is to allow writing to just 
> a
> @@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, 
> u32 val)
>   wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
>  }
>  
> +static void
> +wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
> +{
> + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
> +}
> +
>  static void
>  wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
>  {
>   wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
>  }
>  
> +static void
> +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
> +{
> + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
> +}
> +
>  static void
>  wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
>   u32 mask, u32 val)
> @@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t 
> reg,
>   wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
>  }
>  
> +static void
> +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
> + u32 mask, u32 val)
> +{
> + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
> +}
> +
>  static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
> struct i915_wa_list *wal)
>  {
> @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct 
> intel_engine_cs *engine,
>   wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), 
> ASYNC_FLIP_PERF_DISABLE);
>  
>   /* WaDisablePartialInstShootdown:bdw,chv */
> - wa_masked_en(wal, GEN8_ROW_CHICKEN,
> -  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
> + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
> +  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
>  
>   /* Use Force Non-Coherent whenever executing a 3D context. This is a
>* workaround for a possible hang in the unlikely event a TLB
> @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct 
> intel_engine_cs *engine,
>   gen8_ctx_workarounds_init(engine, wal);
>  
>   /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
> - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
> + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
>  
>   /* WaDisableDopClockGating:bdw
>*
>* Also see the related UCGTCL1 write in bdw_init_clock_gating()
>* to disable EUTC clock gating.
>*/
> - wa_masked_en(wal, GEN8_ROW_CHICKEN2,
> -  DOP_CLOCK_GATING_DISABLE);
> + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
> +

Re: [PATCH v3 10/14] drm/i915/guc: Handle save/restore of MCR registers explicitly

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> MCR registers can be placed on the GuC's save/restore list, but at the
> moment they are always handled in a multicast manner (i.e., the GuC
> reads one instance to save the value and then does a multicast write to
> restore that single value to all instances).  In the future the GuC will
> probably give us an alternate interface to do unicast per-instance
> save/restore operations, so we should be very clear about which
> registers on the list are MCR registers (and in the future which
> save/restore behavior we want for them).
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 55 +-
>  1 file changed, 34 insertions(+), 21 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index cc357fa0c270..de923fb82301 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -278,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct 
> guc_mmio_reg *reg)
>   return slot;
>  }
>  
> -#define GUC_REGSET_STEERING(group, instance) ( \
> - FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
> - FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
> - GUC_REGSET_NEEDS_STEERING \
> -)
> -
>  static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
> struct temp_regset *regset,
> -   i915_reg_t reg, u32 flags)
> +   u32 offset, u32 flags)
>  {
>   u32 count = regset->storage_used - (regset->registers - 
> regset->storage);
> - u32 offset = i915_mmio_reg_offset(reg);
>   struct guc_mmio_reg entry = {
>   .offset = offset,
>   .flags = flags,
>   };
>   struct guc_mmio_reg *slot;
> - u8 group, inst;
>  
>   /*
>* The mmio list is built using separate lists within the driver.
> @@ -307,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct 
> intel_gt *gt,
>   sizeof(entry), guc_mmio_reg_cmp))
>   return 0;
>  
> - /*
> -  * The GuC doesn't have a default steering, so we need to explicitly
> -  * steer all registers that need steering. However, we do not keep track
> -  * of all the steering ranges, only of those that have a chance of using
> -  * a non-default steering from the i915 pov. Instead of adding such
> -  * tracking, it is easier to just program the default steering for all
> -  * regs that don't need a non-default one.
> -  */
> - intel_gt_mcr_get_nonterminated_steering(gt, reg, , );
> - entry.flags |= GUC_REGSET_STEERING(group, inst);
> -
>   slot = __mmio_reg_add(regset, );
>   if (IS_ERR(slot))
>   return PTR_ERR(slot);
> @@ -335,6 +316,38 @@ static long __must_check guc_mmio_reg_add(struct 
> intel_gt *gt,
>  
>  #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
>   guc_mmio_reg_add(gt, \
> +  regset, \
> +  i915_mmio_reg_offset(reg), \
> +  (masked) ? GUC_REGSET_MASKED : 0)
> +
> +#define GUC_REGSET_STEERING(group, instance) ( \
> + FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
> + FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
> + GUC_REGSET_NEEDS_STEERING \
> +)
> +
> +static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
> +  struct temp_regset *regset,
> +  i915_reg_t reg, u32 flags)
> +{
> + u8 group, inst;
> +
> + /*
> +  * The GuC doesn't have a default steering, so we need to explicitly
> +  * steer all registers that need steering. However, we do not keep track
> +  * of all the steering ranges, only of those that have a chance of using
> +  * a non-default steering from the i915 pov. Instead of adding such
> +  * tracking, it is easier to just program the default steering for all
> +  * regs that don't need a non-default one.
> +  */
> + intel_gt_mcr_get_nonterminated_steering(gt, reg, , );
> + flags |= GUC_REGSET_STEERING(group, inst);
> +
> + return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
> +}
> +
> +#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
> + guc_mcr_reg_add(gt, \
>regset, \
>(reg), \
>(masked) ? GUC_REGSET_MASKED : 0)
> @@ -375,7 +388,7 @@ static int guc_mmio_regset_init(struct temp_regset 
> *regset,
>   /* add in local MOCS registers */
>   for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
>   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
> - ret |= GUC_MMIO_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), 
> false);
> + ret |= GUC_MCR_REG_ADD(gt,

Re: [PATCH v3 09/14] drm/i915/gt: Always use MCR functions on multicast registers

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Rather than relying on the implicit behavior of intel_uncore_*()
> functions, let's always use the intel_gt_mcr_*() functions to operate on
> multicast/replicated registers.
> 
> v2:
>  - Add TLB invalidation registers
> 
> v3:
>  - Switch more uncore operations in mmio_invalidate_full() to MCR
>operations for Xe_HP.  (Bala)
> 
> Cc: Balasubramani Vivekanandan 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c| 58 ---
>  drivers/gpu/drm/i915/gt/intel_mocs.c  | 13 ++---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 +++--
>  drivers/gpu/drm/i915/intel_pm.c   | 19 
>  4 files changed, 65 insertions(+), 37 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index e14f159ad9fc..3df0d0336dbc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1017,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, 
> const bool gen8,
>   return rb;
>  }
>  
> +/*
> + * HW architecture suggest typical invalidation time at 40us,
> + * with pessimistic cases up to 100us and a recommendation to
> + * cap at 1ms. We go a bit higher just in case.
> + */
> +#define TLB_INVAL_TIMEOUT_US 100
> +#define TLB_INVAL_TIMEOUT_MS 4
> +
> +/*
> + * On Xe_HP the TLB invalidation registers are located at the same MMIO 
> offsets
> + * but are now considered MCR registers.  Since they exist within a GAM 
> range,
> + * the primary instance of the register rolls up the status from each unit.
> + */
> +static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +{
> + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> + return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
> + TLB_INVAL_TIMEOUT_US,
> + TLB_INVAL_TIMEOUT_MS);
> + else
> + return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 
> 0,
> + TLB_INVAL_TIMEOUT_US,
> + TLB_INVAL_TIMEOUT_MS,
> + NULL);
> +}
> +
>  static void mmio_invalidate_full(struct intel_gt *gt)
>  {
>   static const i915_reg_t gen8_regs[] = {
> @@ -1048,7 +1074,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   unsigned int num = 0;
>  
>   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> - regs = xehp_regs;
> + regs = NULL;
>   num = ARRAY_SIZE(xehp_regs);
>   } else if (GRAPHICS_VER(i915) == 12) {
>   regs = gen12_regs;
> @@ -1075,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   if (!intel_engine_pm_is_awake(engine))
>   continue;
>  
> - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> - if (!i915_mmio_reg_offset(rb.reg))
> - continue;
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + intel_gt_mcr_multicast_write_fw(gt,
> + 
> xehp_regs[engine->class],
> + BIT(engine->instance));
> + } else {
> + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, 
> num);
> + if (!i915_mmio_reg_offset(rb.reg))
> + continue;
>  
> - intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> + intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> + }
>   awake |= engine->mask;
>   }
>  
> @@ -1099,22 +1131,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   for_each_engine_masked(engine, gt, awake, tmp) {
>   struct reg_and_bit rb;
>  
> - /*
> -  * HW architecture suggest typical invalidation time at 40us,
> -  * with pessimistic cases up to 100us and a recommendation to
> -  * cap at 1ms. We go a bit higher just in case.
> -  */
> - const unsigned int timeout_us = 100;
> - const unsigned int timeout_ms = 4;
> -
>   rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> - if (__intel_wait_for_register_fw(uncore,
> -  rb.reg, rb.bit, 0,
> -  timeout_us, timeout_ms,
> -  NULL))
> +
> + if (wait_for_invalidate(gt, rb))
>   drm_err_ratelimited(>i915->drm,
>   "%s TLB invalidation did not 
> complete in %ums!\n",
> -

Re: [RESEND PATCH] drm/amd/amdgpu: Replace kmap() with kmap_local_page()

2022-10-17 Thread Alex Deucher

Applied.  Thanks!

On Sun, Oct 16, 2022 at 1:42 PM Fabio M. De Francesco
 wrote:
>
> kmap() is being deprecated in favor of kmap_local_page().
>
> There are two main problems with kmap(): (1) It comes with an overhead as
> mapping space is restricted and protected by a global lock for
> synchronization and (2) it also requires global TLB invalidation when the
> kmap’s pool wraps and it might block when the mapping space is fully
> utilized until a slot becomes available.
>
> With kmap_local_page() the mappings are per thread, CPU local, can take
> page faults, and can be called from any context (including interrupts).
> It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
> the tasks can be preempted and, when they are scheduled to run again, the
> kernel virtual addresses are restored and are still valid.
>
> Since its use in amdgpu/amdgpu_ttm.c is safe, it should be preferred.
>
> Therefore, replace kmap() with kmap_local_page() in amdgpu/amdgpu_ttm.c.
>
> Suggested-by: Ira Weiny 
> Acked-by: Christian König 
> Signed-off-by: Fabio M. De Francesco 
> ---
>
> I'm resending because I suspect that this patch might have been lost. In
> the meantime I added an "Acked-by" tag from Christian K.. Obviviously,
> there are no further changes in the code.
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3b4c19412625..c11657b5915f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2301,9 +2301,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
> __user *buf,
> if (p->mapping != adev->mman.bdev.dev_mapping)
> return -EPERM;
>
> -   ptr = kmap(p);
> +   ptr = kmap_local_page(p);
> r = copy_to_user(buf, ptr + off, bytes);
> -   kunmap(p);
> +   kunmap_local(ptr);
> if (r)
> return -EFAULT;
>
> @@ -2352,9 +2352,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const 
> char __user *buf,
> if (p->mapping != adev->mman.bdev.dev_mapping)
> return -EPERM;
>
> -   ptr = kmap(p);
> +   ptr = kmap_local_page(p);
> r = copy_from_user(ptr + off, buf, bytes);
> -   kunmap(p);
> +   kunmap_local(ptr);
> if (r)
> return -EFAULT;
>
> --
> 2.37.1
>

Re: [PATCH v3 08/14] drm/i915: Define MCR registers explicitly

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Rather than using the same _MMIO() macro to define MCR registers as
> singleton registers, let's use a new MCR_REG() macro to make it clear
> that these registers are special and should be handled accordingly.  For
> now MCR_REG() will still generate an i915_reg_t with the given offset,
> but we'll change that in future patches.
> 
> Bspec: 66673, 66696, 66534, 67609
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 134 
>  1 file changed, 68 insertions(+), 66 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 890960b56b9e..ad9985015b0e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -8,6 +8,8 @@
>  
>  #include "i915_reg_defs.h"
>  
> +#define MCR_REG(offset)  _MMIO(offset)
> +
>  /* RPM unit config (Gen8+) */
>  #define RPM_CONFIG0  _MMIO(0xd00)
>  #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT  3
> @@ -333,12 +335,12 @@
>  #define GEN7_TLB_RD_ADDR _MMIO(0x4700)
>  
>  #define GEN12_PAT_INDEX(index)   _MMIO(0x4800 + (index) 
> * 4)
> -#define XEHP_PAT_INDEX(index)_MMIO(0x4800 + (index) 
> * 4)
> +#define XEHP_PAT_INDEX(index)MCR_REG(0x4800 + 
> (index) * 4)
>  
> -#define XEHP_TILE0_ADDR_RANGE_MMIO(0x4900)
> +#define XEHP_TILE0_ADDR_RANGEMCR_REG(0x4900)
>  #define   XEHP_TILE_LMEM_RANGE_SHIFT 8
>  
> -#define XEHP_FLAT_CCS_BASE_ADDR  _MMIO(0x4910)
> +#define XEHP_FLAT_CCS_BASE_ADDR  MCR_REG(0x4910)
>  #define   XEHP_CCS_BASE_SHIFT8
>  
>  #define GAMTARBMODE  _MMIO(0x4a08)
> @@ -388,18 +390,18 @@
>  #define CHICKEN_RASTER_2 _MMIO(0x6208)
>  #define   TBIMR_FAST_CLIPREG_BIT(5)
>  
> -#define VFLSKPD  _MMIO(0x62a8)
> +#define VFLSKPD  MCR_REG(0x62a8)
>  #define   DIS_OVER_FETCH_CACHE   REG_BIT(1)
>  #define   DIS_MULT_MISS_RD_SQUASHREG_BIT(0)
>  
>  #define GEN12_FF_MODE2   _MMIO(0x6604)
> -#define XEHP_FF_MODE2_MMIO(0x6604)
> +#define XEHP_FF_MODE2MCR_REG(0x6604)
>  #define   FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
>  #define   FF_MODE2_GS_TIMER_224  
> REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
>  #define   FF_MODE2_TDS_TIMER_MASKREG_GENMASK(23, 16)
>  #define   FF_MODE2_TDS_TIMER_128 
> REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
>  
> -#define XEHPG_INSTDONE_GEOM_SVG  _MMIO(0x666c)
> +#define XEHPG_INSTDONE_GEOM_SVG  MCR_REG(0x666c)
>  
>  #define CACHE_MODE_0_GEN7_MMIO(0x7000) /* IVB+ */
>  #define   RC_OP_FLUSH_ENABLE (1 << 0)
> @@ -448,14 +450,14 @@
>  #define GEN8_HDC_CHICKEN1_MMIO(0x7304)
>  
>  #define GEN11_COMMON_SLICE_CHICKEN3  _MMIO(0x7304)
> -#define XEHP_COMMON_SLICE_CHICKEN3   _MMIO(0x7304)
> +#define XEHP_COMMON_SLICE_CHICKEN3   MCR_REG(0x7304)
>  #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN   REG_BIT(12)
>  #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE  REG_BIT(12)
>  #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
>  #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
>  
>  #define GEN9_SLICE_COMMON_ECO_CHICKEN1   _MMIO(0x731c)
> -#define XEHP_SLICE_COMMON_ECO_CHICKEN1   _MMIO(0x731c)
> +#define XEHP_SLICE_COMMON_ECO_CHICKEN1   MCR_REG(0x731c)
>  #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
>  #define   GEN11_STATE_CACHE_REDIRECT_TO_CS   (1 << 11)
>  
> @@ -486,7 +488,7 @@
>  
>  #define GEN8_RC6_CTX_INFO_MMIO(0x8504)
>  
> -#define XEHP_SQCM_MMIO(0x8724)
> +#define XEHP_SQCMMCR_REG(0x8724)
>  #define   EN_32B_ACCESS  REG_BIT(30)
>  
>  #define HSW_IDICR_MMIO(0x9008)
> @@ -647,7 +649,7 @@
>  #define GEN7_MISCCPCTL   _MMIO(0x9424)
>  #define   GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
>  
> -#define GEN8_MISCCPCTL   _MMIO(0x9424)
> +#define GEN8_MISCCPCTL   MCR_REG(0x9424)
>  #define   GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
>  #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
>  #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE   (1 << 2)
> @@ -703,7 +705,7 @@
>  #define   LTCDD_CLKGATE_DIS  REG_BIT(10)
>  
>  #define

Re: [Intel-gfx] [PATCH v3 07/14] drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Xe_HP has some MCR registers that need to be polled for completion of
> operations like TLB invalidation.  Those registers are in the GAM range,
> which rolls up the status from each unit into the 'primary' instance's
> value.  This makes it useful to have a dedicated 'wait for register'
> function that handles this on MCR registers, similar to the
> __intel_wait_for_register_fw() function we already have for regular
> registers.
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 55 ++
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.h |  7 
>  2 files changed, 62 insertions(+)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index 4dc360f4e344..1ed9bc4dccfd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -568,3 +568,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
> unsigned int dss,
>   return;
>   }
>  }
> +
> +/**
> + * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected 
> state
> + * @gt: GT structure
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: value to wait for
> + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
> + * @slow_timeout_ms: slow timeout in millisecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + * (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
> + *
> + * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
> + * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
> + * must be not larger than 20, microseconds.
> + *
> + * This function is basically an MCR-friendly version of
> + * __intel_wait_for_register_fw().  Generally this function will only be used
> + * on GAM registers which are a bit special --- although they're MCR 
> registers,
> + * reads (e.g., waiting for status updates) are always directed to the 
> primary
> + * instance.
> + *
> + * Note that this routine assumes the caller holds forcewake asserted, it is
> + * not suitable for very long waits.
> + *
> + * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
> + */
> +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
> +  i915_reg_t reg,
> +  u32 mask,
> +  u32 value,
> +  unsigned int fast_timeout_us,
> +  unsigned int slow_timeout_ms)
> +{
> + u32 reg_value = 0;
> +#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == 
> value)
> + int ret;
> +
> + /* Catch any overuse of this function */
> + might_sleep_if(slow_timeout_ms);
> + GEM_BUG_ON(fast_timeout_us > 2);
> + GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
> +
> + ret = -ETIMEDOUT;
> + if (fast_timeout_us && fast_timeout_us <= 2)
> + ret = _wait_for_atomic(done, fast_timeout_us, 0);
> + if (ret && slow_timeout_ms)
> + ret = wait_for(done, slow_timeout_ms);
> +
> + return ret;
> +#undef done
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> index 781b267478db..548f922cd9fa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> @@ -37,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, 
> struct intel_gt *gt,
>  void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
> unsigned int *group, unsigned int *instance);
>  
> +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
> +  i915_reg_t reg,
> +  u32 mask,
> +  u32 value,
> +  unsigned int fast_timeout_us,
> +  unsigned int slow_timeout_ms);
> +
>  /*
>   * Helper for for_each_ss_steering loop.  On pre-Xe_HP platforms, subslice
>   * presence is determined by using the group/instance as direct lookups in 
> the
> -- 
> 2.37.3
>

Re: [PATCH v3 06/14] drm/i915/xehp: Check for faults on primary GAM

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> On Xe_HP the fault registers are now in a multicast register range.
> However as part of the GAM these registers follow special rules and we
> need only read from the "primary" GAM's instance to get the information
> we need.  So a single intel_gt_mcr_read_any() (which will automatically
> steer to the primary GAM) is sufficient; we don't need to loop over each
> instance of the MCR register.
> 
> v2:
>  - Update more instances of fault registers.  (Bala)
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c| 52 +++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 12 +--
>  2 files changed, 55 insertions(+), 9 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 445e171940fa..e14f159ad9fc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -270,7 +270,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
>  I915_MASTER_ERROR_INTERRUPT);
>   }
>  
> - if (GRAPHICS_VER(i915) >= 12) {
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
> +RING_FAULT_VALID, 0);
> + intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
> + } else if (GRAPHICS_VER(i915) >= 12) {
>   rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
>   intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
>   } else if (GRAPHICS_VER(i915) >= 8) {
> @@ -308,17 +312,49 @@ static void gen6_check_faults(struct intel_gt *gt)
>   }
>  }
>  
> +static void xehp_check_faults(struct intel_gt *gt)
> +{
> + u32 fault;
> +
> + /*
> +  * Although the fault register now lives in an MCR register range,
> +  * the GAM registers are special and we only truly need to read
> +  * the "primary" GAM instance rather than handling each instance
> +  * individually.  intel_gt_mcr_read_any() will automatically steer
> +  * toward the primary instance.
> +  */
> + fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
> + if (fault & RING_FAULT_VALID) {
> + u32 fault_data0, fault_data1;
> + u64 fault_addr;
> +
> + fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
> + fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
> +
> + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
> +  ((u64)fault_data0 << 12);
> +
> + drm_dbg(>i915->drm, "Unexpected fault\n"
> + "\tAddr: 0x%08x_%08x\n"
> + "\tAddress space: %s\n"
> + "\tEngine ID: %d\n"
> + "\tSource ID: %d\n"
> + "\tType: %d\n",
> + upper_32_bits(fault_addr), lower_32_bits(fault_addr),
> + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
> + GEN8_RING_FAULT_ENGINE_ID(fault),
> + RING_FAULT_SRCID(fault),
> + RING_FAULT_FAULT_TYPE(fault));
> + }
> +}
> +
>  static void gen8_check_faults(struct intel_gt *gt)
>  {
>   struct intel_uncore *uncore = gt->uncore;
>   i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
>   u32 fault;
>  
> - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
> - fault_reg = XEHP_RING_FAULT_REG;
> - fault_data0_reg = XEHP_FAULT_TLB_DATA0;
> - fault_data1_reg = XEHP_FAULT_TLB_DATA1;
> - } else if (GRAPHICS_VER(gt->i915) >= 12) {
> + if (GRAPHICS_VER(gt->i915) >= 12) {
>   fault_reg = GEN12_RING_FAULT_REG;
>   fault_data0_reg = GEN12_FAULT_TLB_DATA0;
>   fault_data1_reg = GEN12_FAULT_TLB_DATA1;
> @@ -358,7 +394,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
>   struct drm_i915_private *i915 = gt->i915;
>  
>   /* From GEN8 onwards we only have one 'All Engine Fault Register' */
> - if (GRAPHICS_VER(i915) >= 8)
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
> + xehp_check_faults(gt);
> + else if (GRAPHICS_VER(i915) >= 8)
>   gen8_check_faults(gt);
>   else if (GRAPHICS_VER(i915) >= 6)
>   gen6_check_faults(gt);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 9ea2fe34e7d3..f2d53edcd2ee 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1221,7 +1221,10 @@ static void engine_record_registers(struct 
> intel_engine_coredump *ee)
>   if (GRAPHICS_VER(i915) >= 6) {
>   ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
>  
> - if (GRAPHICS_VER(i915) >= 12)
> +

Re: [PATCH] drm/radeon: Replace kmap() with kmap_local_page()

2022-10-17 Thread Alex Deucher

Applied.  Thanks!

On Fri, Oct 14, 2022 at 3:03 AM Christian König
 wrote:
>
> Am 13.10.22 um 23:07 schrieb Fabio M. De Francesco:
> > The use of kmap() is being deprecated in favor of kmap_local_page().
> >
> > There are two main problems with kmap(): (1) It comes with an overhead as
> > the mapping space is restricted and protected by a global lock for
> > synchronization and (2) it also requires global TLB invalidation when the
> > kmap’s pool wraps and it might block when the mapping space is fully
> > utilized until a slot becomes available.
> >
> > With kmap_local_page() the mappings are per thread, CPU local, can take
> > page faults, and can be called from any context (including interrupts).
> > It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
> > the tasks can be preempted and, when they are scheduled to run again, the
> > kernel virtual addresses are restored and still valid.
> >
> > Therefore, replace kmap() with kmap_local_page() in radeon_ttm_gtt_read().
> >
> > Cc: "Venkataramanan, Anirudh" 
> > Suggested-by: Ira Weiny 
> > Signed-off-by: Fabio M. De Francesco 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> > b/drivers/gpu/drm/radeon/radeon_ttm.c
> > index d33fec488713..bdb4c0e0736b 100644
> > --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> > @@ -869,11 +869,11 @@ static ssize_t radeon_ttm_gtt_read(struct file *f, 
> > char __user *buf,
> >
> >   page = rdev->gart.pages[p];
> >   if (page) {
> > - ptr = kmap(page);
> > + ptr = kmap_local_page(page);
> >   ptr += off;
> >
> >   r = copy_to_user(buf, ptr, cur_size);
> > - kunmap(rdev->gart.pages[p]);
> > + kunmap_local(ptr);
> >   } else
> >   r = clear_user(buf, cur_size);
> >
>

Re: [PATCH v3 05/14] drm/i915/gt: Add intel_gt_mcr_multicast_rmw() operation

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> There are cases where we wish to read from any non-terminated MCR
> register instance (or the primary instance in the case of GAM ranges),
> clear/set some bits, and then write the value back out to the register
> in a multicast manner.  Adding a "multicast RMW" will avoid the need to
> open-code this.
> 
> v2:
>  - Return a u32 to align with the recent change to intel_uncore_rmw.
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 28 ++
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.h |  3 +++
>  2 files changed, 31 insertions(+)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index a2047a68ea7a..4dc360f4e344 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -302,6 +302,34 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt 
> *gt, i915_reg_t reg, u32 va
>   intel_uncore_write_fw(gt->uncore, reg, value);
>  }
>  
> +/**
> + * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
> + * @gt: GT structure
> + * @reg: the MCR register to read and write
> + * @clear: bits to clear during RMW
> + * @set: bits to set during RMW
> + *
> + * Performs a read-modify-write on an MCR register in a multicast manner.
> + * This operation only makes sense on MCR registers where all instances are
> + * expected to have the same value.  The read will target any non-terminated
> + * instance and the write will be applied to all instances.
> + *
> + * This function assumes the caller is already holding any necessary 
> forcewake
> + * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
> + * be obtained automatically.
> + *
> + * Returns the old (unmodified) value read.
> + */
> +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
> +u32 clear, u32 set)
> +{
> + u32 val = intel_gt_mcr_read_any(gt, reg);
> +
> + intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
> +
> + return val;
> +}
> +
>  /*
>   * reg_needs_read_steering - determine whether a register read requires
>   * explicit steering
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> index 77a8b11c287d..781b267478db 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
> @@ -24,6 +24,9 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
>  void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
>i915_reg_t reg, u32 value);
>  
> +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
> +u32 clear, u32 set);
> +
>  void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
>i915_reg_t reg,
>u8 *group, u8 *instance);
> -- 
> 2.37.3
>

Re: [PATCH v3 04/14] drm/i915/gt: Correct prefix on a few registers

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> We have a few registers that have existed for several hardware
> generations, but are only used by the driver on Xe_HP and beyond.  In
> cases where the Xe_HP version of the register is now replicated and uses
> multicast behavior, but earlier generations were singleton, let's change
> the register prefix to "XEHP_" to help clarify that we're using the
> newer multicast form of the register.
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  8 
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 +-
>  2 files changed, 9 insertions(+), 9 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 71d8787230c1..890960b56b9e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -486,7 +486,7 @@
>  
>  #define GEN8_RC6_CTX_INFO_MMIO(0x8504)
>  
> -#define GEN12_SQCM   _MMIO(0x8724)
> +#define XEHP_SQCM_MMIO(0x8724)
>  #define   EN_32B_ACCESS  REG_BIT(30)
>  
>  #define HSW_IDICR_MMIO(0x9008)
> @@ -989,7 +989,7 @@
>  #define GEN11_SCRATCH2   _MMIO(0xb140)
>  #define   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE  (1 << 19)
>  
> -#define GEN11_L3SQCREG5  _MMIO(0xb158)
> +#define XEHP_L3SQCREG5   _MMIO(0xb158)
>  #define   L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
>  
>  #define MLTICTXCTL   _MMIO(0xb170)
> @@ -1053,7 +1053,7 @@
>  #define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
>  #define XEHP_COMPCTX_TLB_INV_CR  _MMIO(0xcf04)
>  
> -#define GEN12_MERT_MOD_CTRL  _MMIO(0xcf28)
> +#define XEHP_MERT_MOD_CTRL   _MMIO(0xcf28)
>  #define RENDER_MOD_CTRL  _MMIO(0xcf2c)
>  #define COMP_MOD_CTRL_MMIO(0xcf30)
>  #define VDBX_MOD_CTRL_MMIO(0xcf34)
> @@ -1155,7 +1155,7 @@
>  #define EU_PERF_CNTL1_MMIO(0xe558)
>  #define EU_PERF_CNTL5_MMIO(0xe55c)
>  
> -#define GEN12_HDC_CHICKEN0   _MMIO(0xe5f0)
> +#define XEHP_HDC_CHICKEN0_MMIO(0xe5f0)
>  #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 
> 11)
>  #define ICL_HDC_MODE _MMIO(0xe5f4)
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 3056b099dd17..96b9f02a2284 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -569,7 +569,7 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs 
> *engine,
>  struct i915_wa_list *wal)
>  {
>   wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
> - wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> + wa_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
>REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
>   wa_add(wal,
>  XEHP_FF_MODE2,
> @@ -1514,7 +1514,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>* recommended tuning settings documented in the bspec's
>* performance guide section.
>*/
> - wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
> + wa_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
>  
>   /* Wa_14015795083 */
>   wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
> @@ -2170,7 +2170,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
> struct i915_wa_list *wal)
>* Wa_22010960976:dg2
>* Wa_14013347512:dg2
>*/
> - wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
> + wa_masked_dis(wal, XEHP_HDC_CHICKEN0,
> LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
>   }
>  
> @@ -2223,7 +2223,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
> struct i915_wa_list *wal)
>   if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
>   IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
>   /* Wa_14012362059:dg2 */
> - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
> + wa_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
>   }
>  
>   if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
> @@ -2816,7 +2816,7 @@ general_render_compute_wa_init(struct intel_engine_cs 
> *engine, struct i915_wa_li
>   }
>  
>   /* Wa_14012362059:xehpsdv */
> - wa_write_or(wal, GEN12_MERT_MOD_CTRL,

Re: [PATCH] drm/amd/display: Increase frame size limit for display_mode_vba_util_32.o

2022-10-17 Thread Alex Deucher

Applied.  Thanks!

On Thu, Oct 13, 2022 at 2:25 PM Guenter Roeck  wrote:
>
> Building 32-bit images may fail with the following error.
>
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:
> In function ‘dml32_UseMinimumDCFCLK’:
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:3142:1:
> error: the frame size of 1096 bytes is larger than 1024 bytes
>
> This is seen when building i386:allmodconfig with any of the following
> compilers.
>
> gcc (Debian 12.2.0-3) 12.2.0
> gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
>
> The problem is not seen if the compiler supports GCC_PLUGIN_LATENT_ENTROPY
> because in that case CONFIG_FRAME_WARN is already set to 2048 even for
> 32-bit builds.
>
> dml32_UseMinimumDCFCLK() was introduced with commit dda4fb85e433
> ("drm/amd/display: DML changes for DCN32/321"). It declares a large
> number of local variables. Increase the frame size for the affected
> file to 2048, similar to other files in the same directory, to enable
> 32-bit build tests with affected compilers.
>
> Fixes: dda4fb85e433 ("drm/amd/display: DML changes for DCN32/321")
> Cc: Aurabindo Pillai 
> Reported-by: Łukasz Bartosik 
> Signed-off-by: Guenter Roeck 
> ---
>  drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile 
> b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index d70838edba80..ca7d24000621 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -77,7 +77,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := 
> $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) 
> $(frame_warn_flag)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
> -CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := 
> $(dml_ccflags)
> +CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := 
> $(dml_ccflags) $(frame_warn_flag)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
> --
> 2.36.2
>

Re: [PATCH v3 03/14] drm/i915/gt: Drop a few unused register definitions

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Let's drop a few register definitions that are unused anywhere in the
> driver today.  Since the referenced offsets are part of what is now
> considered a multicast register region, the current definitions would
> not be correct for use on any future platform.
> 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 17 -
>  1 file changed, 17 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala

Re: [PATCH v3 02/14] drm/i915/xehp: Create separate reg definitions for new MCR registers

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Starting in Xe_HP, several registers our driver works with have been
> converted from singleton registers into replicated registers with
> multicast behavior.  Although the registers are still located at the
> same MMIO offsets as on previous platforms, let's duplicate the register
> definitions in preparation for upcoming patches that will handle
> multicast registers in a special manner.
> 
> The registers that are now replicated on Xe_HP are:
>  * PAT_INDEX (mslice replication)
>  * FF_MODE2 (gslice replication)
>  * COMMON_SLICE_CHICKEN3 (gslice replication)
>  * SLICE_COMMON_ECO_CHICKEN1 (gslice replication)
>  * SLICE_UNIT_LEVEL_CLKGATE (gslice replication)
>  * LNCFCMOCS (lncf replication)
> 
> Note that there are a couple places in selftest_mocs.c where the
> gen9 version of LNCFCMOCS is still used without regards for which
> platform we're on.  Those cases are just doing an offset lookup and not
> issuing any CPU reads/writes of the register, so the potentially
> multicast nature of the register doesn't come into play.
> 
> v2:
>  - Add commit message note about the unconditional GEN9_LNCFCMOCS usage
>in selftest_mocs.  (Bala)
>  - Include some additional TLB registers.
> 
> Bspec: 66534
> Cc: Balasubramani Vivekanandan 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c|  4 ++--
>  drivers/gpu/drm/i915/gt/intel_gt.c  | 18 --
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 26 +++--
>  drivers/gpu/drm/i915/gt/intel_gtt.c | 22 ++---
>  drivers/gpu/drm/i915/gt/intel_gtt.h |  2 +-
>  drivers/gpu/drm/i915/gt/intel_mocs.c|  5 +++-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  |  7 --
>  8 files changed, 78 insertions(+), 30 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala

Re: [PATCH v3 01/14] drm/i915/gen8: Create separate reg definitions for new MCR registers

2022-10-17 Thread Balasubramani Vivekanandan

On 14.10.2022 16:02, Matt Roper wrote:
> Gen8 was the first time our hardware had multicast registers (or at
> least the first time the multicast nature was exposed and MMIO accesses
> could be steered).  There are some registers that transitioned from
> singleton behavior to multicast during the gen7 -> gen8 transition;
> let's duplicate the register definitions for those registers in
> preparation for upcoming patches that will handle MCR registers in a
> special manner.
> 
> The registers adjusted are:
>  * MISCCPCTL
>  * SAMPLER_INSTDONE
>  * ROW_INSTDONE
>  * ROW_CHICKEN2
>  * HALF_SLICE_CHICKEN1
>  * HALF_SLICE_CHICKEN3
> 
> v2:
>  - Use the gen8 version of HALF_SLICE_CHICKEN3 in GVT's gen9 engine MMIO
>list.  (Bala)
>  - Update to the gen8 version of MISCCPCTL in a couple new workarounds
>that were recently added for DG2/PVC.  (Bala)
> 
> Signed-off-by: Matt Roper 
> Reviewed-by: Balasubramani Vivekanandan 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 +--
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 11 +++-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c   | 26 +--
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c|  4 +--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c |  2 +-
>  drivers/gpu/drm/i915/gvt/handlers.c   |  2 +-
>  drivers/gpu/drm/i915/gvt/mmio_context.c   |  2 +-
>  drivers/gpu/drm/i915/intel_gvt_mmio_table.c   |  2 +-
>  drivers/gpu/drm/i915/intel_pm.c   |  9 ---
>  9 files changed, 36 insertions(+), 26 deletions(-)

Reviewed-by: Balasubramani Vivekanandan 

Regards,
Bala

[PATCH] drm/amdkfd: Fix type of reset_type parameter in hqd_destroy() callback

2022-10-17 Thread Nathan Chancellor

When booting a kernel compiled with CONFIG_CFI_CLANG on a machine with
an RX 6700 XT, there is a CFI failure in kfd_destroy_mqd_cp():

  [   12.894543] CFI failure at kfd_destroy_mqd_cp+0x2a/0x40 [amdgpu] (target: 
hqd_destroy_v10_3+0x0/0x260 [amdgpu]; expected type: 0x8594d794)

Clang's kernel Control Flow Integrity (kCFI) makes sure that all
indirect call targets have a type that exactly matches the function
pointer prototype. In this case, hqd_destroy()'s third parameter,
reset_type, should have a type of 'uint32_t' but every implementation of
this callback has a third parameter type of 'enum kfd_preempt_type'.

Update the function pointer prototype to match reality so that there is
no more CFI violation.

Link: https://github.com/ClangBuiltLinux/linux/issues/1738
Signed-off-by: Nathan Chancellor 
---

No Fixes tag, as I could not pin down exactly when this started. I
suspect it is

Fixes: 70539bd79500 ("drm/amd: Update MEC HQD loading code for KFD")

but I did not want to add that without a second look. Feel free to add
it during patch application if it makes sense.

 drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index e85364dff4e0..5cb3e8634739 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -262,8 +262,9 @@ struct kfd2kgd_calls {
uint32_t queue_id);
 
int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd,
-   uint32_t reset_type, unsigned int timeout,
-   uint32_t pipe_id, uint32_t queue_id);
+   enum kfd_preempt_type reset_type,
+   unsigned int timeout, uint32_t pipe_id,
+   uint32_t queue_id);
 
bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd);
 

base-commit: 9abf2313adc1ca1b6180c508c25f22f9395cc780
-- 
2.38.0

Re: [Freedreno] [PATCH v3 06/10] drm/msm/dsi: Migrate to drm_dsc_compute_rc_parameters()

2022-10-17 Thread Abhinav Kumar

On 10/17/2022 6:37 AM, Caleb Connolly wrote:

On 17/10/2022 09:59, Marijn Suijten wrote:

On 2022-10-13 09:02:44, Abhinav Kumar wrote:

On 10/13/2022 2:36 AM, Marijn Suijten wrote:

On 2022-10-12 16:03:06, Abhinav Kumar wrote:

[..]
But I would like to hold back this change till Vinod clarifies because
Vinod had mentioned that with drm_dsc_compute_rc_parameters() he was
seeing a mismatch in the computation of two values.

slice_bpg_offset and the final_offset.

Unsurprisingly so because final_offset, and slice_bpg_offset through
initial_offset depend directly on bits_per_pixel. The main takeaway of
this series is that Vinod was interpreting this field as integer instead
of containing 4 fractional bits. If he updates his the panel driver [1]
to set bits_per_pixel = 8 << 4 instead of just 8 to account for this,
the values should check out once again.

[1]:
https://git.linaro.org/people/vinod.koul/kernel.git/commit/?h=topic/pixel3_5.18-rc1=1d7d98ad564f1ec69e7525e07418918d90f247a1

Once Vinod (or someone else in the posession of a Pixel 3) confirms
this, I can respin this series and more explicitly explain why the FIXME
was put in place, instead of being resolved outright?

- Marijn

Makes perfect sense to me.

Will just wait for Vinod's tested-by.

Unfortunately Vinod doesn't have access to this device anymore, but
Caleb recently sent the support series including display driver for
Pixel 3 and is picking up the testing. User "Newbyte" from #linux-msm
promised to test on the LG G7 to have even more input samples.

Hi,

I'm hoping to pick the Pixel 3 stuff back up at some point, but right now there
seem to be quite a few issues outside of DSC which make testing it a bit of a
pain.

I gave Marijn's series [1] a go but wasn't able to get anything usable out of
the
panel, however I doubt this is a DSC issue as I've always needed some hacks to
get the panel working - I've never had any success with it without skipping both
the initial panel reset and sending the PPS payload.

I think if Marijn has managed to initialise a panel properly then the lack of
Pixel 3 for validation shouldn't be a blocker to merge these fixes.

[1]:
https://lore.kernel.org/linux-arm-msm/20221009184824.457416-1-marijn.suij...@somainline.org/

- Marijn

Alright, the onus is then on Vinod/ users of pixel3 to report/debug
whatever issues arise out of this computation.

Patch itself LGTM, hence

Reviewed-by: Abhinav Kumar

--
Kind Regards,
Caleb

[PATCH v3] drm/amd/display: add an ASSERT() to irq service functions

2022-10-17 Thread Hamza Mahfooz

Currently, if we encounter unimplemented functions, it is difficult to
tell what caused them just by looking at dmesg and that is compounded by
the fact that it is often hard to reproduce said issues, for instance we
have had reports of this condition being triggered when removing a
secondary display that is setup in mirror mode and is connected using
usb-c. So, to have access to more detailed debugging information, add an
ASSERT() to dal_irq_service_ack() and dal_irq_service_set() that only
triggers when we encounter an unimplemented function.

Signed-off-by: Hamza Mahfooz 
---
v2: detail specific instance that I'm interested in and use ASSERT()
instead of WARN().

v3: move ASSERT()s inside the new if blocks.
---
 .../gpu/drm/amd/display/dc/irq/irq_service.c| 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c 
b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 7bad39bba86b..d100edaedbbb 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -112,8 +112,15 @@ bool dal_irq_service_set(
 
dal_irq_service_ack(irq_service, source);
 
-   if (info->funcs && info->funcs->set)
+   if (info->funcs && info->funcs->set) {
+   if (info->funcs->set == dal_irq_service_dummy_set) {
+   DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__,
+  source, enable);
+   ASSERT(0);
+   }
+
return info->funcs->set(irq_service, info, enable);
+   }
 
dal_irq_service_set_generic(irq_service, info, enable);
 
@@ -146,8 +153,14 @@ bool dal_irq_service_ack(
return false;
}
 
-   if (info->funcs && info->funcs->ack)
+   if (info->funcs && info->funcs->ack) {
+   if (info->funcs->ack == dal_irq_service_dummy_ack) {
+   DC_LOG_WARNING("%s: src: %d\n", __func__, source);
+   ASSERT(0);
+   }
+
return info->funcs->ack(irq_service, info);
+   }
 
dal_irq_service_ack_generic(irq_service, info);
 
-- 
2.38.0

Re: [PATCH v2] drm/amd/display: add an ASSERT() to irq service functions

2022-10-17 Thread Harry Wentland




On 2022-10-17 11:38, Hamza Mahfooz wrote:
> Currently, if we encounter unimplemented functions, it is difficult to
> tell what caused them just by looking at dmesg and that is compounded by
> the fact that it is often hard to reproduce said issues, for instance we
> have had reports of this condition being triggered when removing a
> secondary display that is setup in mirror mode and is connected using
> usb-c. So, to have access to more detailed debugging information, add an
> ASSERT() to dal_irq_service_ack() and dal_irq_service_set() that only
> triggers when we encounter an unimplemented function.
> 
> Signed-off-by: Hamza Mahfooz 
> ---
> v2: detail specific instance that I'm interested in and use ASSERT()
> instead of WARN().
> ---
>  .../gpu/drm/amd/display/dc/irq/irq_service.c| 17 +++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c 
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> index 7bad39bba86b..3d6ab4fd25cb 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> @@ -112,8 +112,15 @@ bool dal_irq_service_set(
>  
>   dal_irq_service_ack(irq_service, source);
>  
> - if (info->funcs && info->funcs->set)
> + if (info->funcs && info->funcs->set) {
> + if (info->funcs->set == dal_irq_service_dummy_set)
> + DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__,
> +source, enable);
> +
> + ASSERT(info->funcs->set != dal_irq_service_dummy_set);

I think you'll want the ASSERT inside the if block.

Harry

> +
>   return info->funcs->set(irq_service, info, enable);
> + }
>  
>   dal_irq_service_set_generic(irq_service, info, enable);
>  
> @@ -146,8 +153,14 @@ bool dal_irq_service_ack(
>   return false;
>   }
>  
> - if (info->funcs && info->funcs->ack)
> + if (info->funcs && info->funcs->ack) {
> + if (info->funcs->ack == dal_irq_service_dummy_ack)
> + DC_LOG_WARNING("%s: src: %d\n", __func__, source);
> +
> + ASSERT(info->funcs->ack != dal_irq_service_dummy_ack);
> +
>   return info->funcs->ack(irq_service, info);
> + }
>  
>   dal_irq_service_ack_generic(irq_service, info);
>

Re: [PATCH v2 0/2] drm/panfrost: Fix UAPI for C++/BSD compatibility

2022-10-17 Thread Alyssa Rosenzweig

Series is

Reviewed-by: Alyssa Rosenzweig 

Thank you for this, please push to the appropriate trees so we can fix
the Mesa build.

On Mon, Oct 17, 2022 at 11:46:00AM +0100, Steven Price wrote:
> The Panfrost DRM interface to user space is uesd in Mesa for targets
> other than C/Linux. Specifically the header file needs to compile in C++
> code and for FreeBSD which shares the same UABI.
> 
> The first patch fixes the C++ compilation issue by removing the
> (unnecessary) type name from internal structs which is invalid in C++.
> 
> The second patch technically changes the UABI by changing the header
> values in the dump format to be native endian rather than fixed
> little-endian. Since (a) there are no known big-endian Mali systems, and
> (b) this has only appeared in -rc1, this shouldn't break user space.
> Tools can use the 'magic' field to identify the endianness of the dump
> if they want to support big-endian.
> 
> This is effectively a 'v2' of Adri??n's series here [1].
> 
> [1] 
> https://lore.kernel.org/r/20220920211545.1017355-1-adrian.larumbe%40collabora.com
> 
> Steven Price (2):
>   drm/panfrost: Remove type name from internal structs
>   drm/panfrost: replace endian-specific types with native ones
> 
>  drivers/gpu/drm/panfrost/panfrost_dump.c | 36 
>  include/uapi/drm/panfrost_drm.h  | 36 +---
>  2 files changed, 38 insertions(+), 34 deletions(-)
> 
> -- 
> 2.34.1
>

[PATCH v2] drm/amd/display: add an ASSERT() to irq service functions

2022-10-17 Thread Hamza Mahfooz

Currently, if we encounter unimplemented functions, it is difficult to
tell what caused them just by looking at dmesg and that is compounded by
the fact that it is often hard to reproduce said issues, for instance we
have had reports of this condition being triggered when removing a
secondary display that is setup in mirror mode and is connected using
usb-c. So, to have access to more detailed debugging information, add an
ASSERT() to dal_irq_service_ack() and dal_irq_service_set() that only
triggers when we encounter an unimplemented function.

Signed-off-by: Hamza Mahfooz 
---
v2: detail specific instance that I'm interested in and use ASSERT()
instead of WARN().
---
 .../gpu/drm/amd/display/dc/irq/irq_service.c| 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c 
b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 7bad39bba86b..3d6ab4fd25cb 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -112,8 +112,15 @@ bool dal_irq_service_set(
 
dal_irq_service_ack(irq_service, source);
 
-   if (info->funcs && info->funcs->set)
+   if (info->funcs && info->funcs->set) {
+   if (info->funcs->set == dal_irq_service_dummy_set)
+   DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__,
+  source, enable);
+
+   ASSERT(info->funcs->set != dal_irq_service_dummy_set);
+
return info->funcs->set(irq_service, info, enable);
+   }
 
dal_irq_service_set_generic(irq_service, info, enable);
 
@@ -146,8 +153,14 @@ bool dal_irq_service_ack(
return false;
}
 
-   if (info->funcs && info->funcs->ack)
+   if (info->funcs && info->funcs->ack) {
+   if (info->funcs->ack == dal_irq_service_dummy_ack)
+   DC_LOG_WARNING("%s: src: %d\n", __func__, source);
+
+   ASSERT(info->funcs->ack != dal_irq_service_dummy_ack);
+
return info->funcs->ack(irq_service, info);
+   }
 
dal_irq_service_ack_generic(irq_service, info);
 
-- 
2.38.0

Re: [PATCH 2/2] drm/connector: send hotplug uevent on connector cleanup

2022-10-17 Thread Jonas Ådahl

On Mon, Oct 17, 2022 at 03:32:01PM +, Simon Ser wrote:
> A typical DP-MST unplug removes a KMS connector. However care must
> be taken to properly synchronize with user-space. The expected
> sequence of events is the following:
> 
> 1. The kernel notices that the DP-MST port is gone.
> 2. The kernel marks the connector as disconnected, then sends a
>uevent to make user-space re-scan the connector list.
> 3. User-space notices the connector goes from connected to disconnected,
>disables it.
> 4. Kernel handles the the IOCTL disabling the connector. On success,
>the very last reference to the struct drm_connector is dropped and
>drm_connector_cleanup() is called.
> 5. The connector is removed from the list, and a uevent is sent to tell
>user-space that the connector disappeared.
> 
> The very last step was missing. As a result, user-space thought the
> connector still existed and could try to disable it again. Since the
> kernel no longer knows about the connector, that would end up with
> EINVAL and confused user-space.
> 
> Fix this by sending a hotplug uevent from drm_connector_cleanup().
> 
> Signed-off-by: Simon Ser 
> Cc: sta...@vger.kernel.org
> Cc: Daniel Vetter 
> Cc: Lyude Paul 
> Cc: Jonas Ådahl 

Tested-by: Jonas Ådahl 


Jonas

[PATCH 2/2] drm/connector: send hotplug uevent on connector cleanup

2022-10-17 Thread Simon Ser

A typical DP-MST unplug removes a KMS connector. However care must
be taken to properly synchronize with user-space. The expected
sequence of events is the following:

1. The kernel notices that the DP-MST port is gone.
2. The kernel marks the connector as disconnected, then sends a
   uevent to make user-space re-scan the connector list.
3. User-space notices the connector goes from connected to disconnected,
   disables it.
4. Kernel handles the the IOCTL disabling the connector. On success,
   the very last reference to the struct drm_connector is dropped and
   drm_connector_cleanup() is called.
5. The connector is removed from the list, and a uevent is sent to tell
   user-space that the connector disappeared.

The very last step was missing. As a result, user-space thought the
connector still existed and could try to disable it again. Since the
kernel no longer knows about the connector, that would end up with
EINVAL and confused user-space.

Fix this by sending a hotplug uevent from drm_connector_cleanup().

Signed-off-by: Simon Ser 
Cc: sta...@vger.kernel.org
Cc: Daniel Vetter 
Cc: Lyude Paul 
Cc: Jonas Ådahl 
---
 drivers/gpu/drm/drm_connector.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index e3142c8142b3..90dad87e9ad0 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -582,6 +582,9 @@ void drm_connector_cleanup(struct drm_connector *connector)
mutex_destroy(>mutex);
 
memset(connector, 0, sizeof(*connector));
+
+   if (dev->registered)
+   drm_sysfs_hotplug_event(dev);
 }
 EXPORT_SYMBOL(drm_connector_cleanup);
 
-- 
2.38.0

[PATCH 1/2] Revert "drm: hide unregistered connectors from GETCONNECTOR IOCTL"

2022-10-17 Thread Simon Ser

This reverts commit 981f09295687f856d5345e19c7084aca481c1395.

It turns out this breaks Mutter.

Signed-off-by: Simon Ser 
Cc: Daniel Vetter 
Cc: Lyude Paul 
Cc: Jonas Ådahl 
---
 drivers/gpu/drm/drm_mode_config.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_mode_config.c 
b/drivers/gpu/drm/drm_mode_config.c
index 939d621c9ad4..688c8afe0bf1 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -151,9 +151,6 @@ int drm_mode_getresources(struct drm_device *dev, void 
*data,
count = 0;
connector_id = u64_to_user_ptr(card_res->connector_id_ptr);
drm_for_each_connector_iter(connector, _iter) {
-   if (connector->registration_state != DRM_CONNECTOR_REGISTERED)
-   continue;
-
/* only expose writeback connectors if userspace understands 
them */
if (!file_priv->writeback_connectors &&
(connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK))
-- 
2.38.0

Re: Build regressions/improvements in v6.1-rc1

2022-10-17 Thread Geert Uytterhoeven


On Mon, 17 Oct 2022, Geert Uytterhoeven wrote:

Below is the list of build error/warning regressions/improvements in
v6.1-rc1[1] compared to v6.0[2].

Summarized:
 - build errors: +25/-13



[1] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/9abf2313adc1ca1b6180c508c25f22f9395cc780/
 (all 149 configs)
[2] 
http://kisskb.ellerman.id.au/kisskb/branch/linus/head/4fe89d07dcc2804c8b562f6c7896a45643d34b2f/
 (135 out of 149 configs)


*** ERRORS ***

25 error regressions:
 + /kisskb/src/arch/arm64/include/asm/cputype.h: error: initializer element is not 
constant:  => 44:2


arm64-gcc5/arm64-allmodconfig (arch/arm64/kernel/proton-pack.c:872)


 + /kisskb/src/arch/um/include/asm/processor-generic.h: error: called object is 
not a function or function pointer:  => 94:18
 + /kisskb/src/drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c: error: control 
reaches end of non-void function [-Werror=return-type]:  => 1934:1
 + /kisskb/src/drivers/infiniband/hw/qib/qib_wc_x86_64.c: error: 'X86_VENDOR_AMD' 
undeclared (first use in this function):  => 149:37
 + /kisskb/src/drivers/infiniband/hw/qib/qib_wc_x86_64.c: error: 'struct 
cpuinfo_um' has no member named 'x86_vendor':  => 149:22
 + /kisskb/src/drivers/infiniband/hw/qib/qib_wc_x86_64.c: error: control reaches 
end of non-void function [-Werror=return-type]:  => 150:1
 + /kisskb/src/drivers/infiniband/sw/rdmavt/qp.c: error: 'struct cpuinfo_um' has 
no member named 'x86_cache_size':  => 88:22
 + /kisskb/src/drivers/infiniband/sw/rdmavt/qp.c: error: control reaches end of 
non-void function [-Werror=return-type]:  => 89:1
 + /kisskb/src/drivers/infiniband/sw/rdmavt/qp.c: error: implicit declaration of 
function '__copy_user_nocache' [-Werror=implicit-function-declaration]:  => 
100:2


um-x86_64/um-all{mod,yes}config


 + /kisskb/src/drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c: error: 
array subscript is above array bounds [-Werror=array-bounds]:  => 531:55


arm64-gcc5/arm64-allmodconfig
mipsel-gcc5/mips-allmodconfig
powerpc-gcc5/powerpc-all{mod,yes}config
powerpc-gcc5/ppc32_allmodconfig
powerpc-gcc5/ppc64_book3e_allmodconfig
powerpc-gcc5/ppc64le_allmodconfig
sparc64-gcc5/sparc64-allmodconfig


 + error: modpost: "ebus_dma_enable" [drivers/parport/parport_pc.ko] undefined!:  
=> N/A
 + error: modpost: "ebus_dma_irq_enable" [drivers/parport/parport_pc.ko] 
undefined!:  => N/A
 + error: modpost: "ebus_dma_prepare" [drivers/parport/parport_pc.ko] undefined!:  
=> N/A
 + error: modpost: "ebus_dma_register" [drivers/parport/parport_pc.ko] undefined!: 
 => N/A
 + error: modpost: "ebus_dma_request" [drivers/parport/parport_pc.ko] undefined!:  
=> N/A
 + error: modpost: "ebus_dma_residue" [drivers/parport/parport_pc.ko] undefined!:  
=> N/A
 + error: modpost: "ebus_dma_unregister" [drivers/parport/parport_pc.ko] 
undefined!:  => N/A
 + error: modpost: "ns87303_lock" [drivers/parport/parport_pc.ko] undefined!:  
=> N/A


sparc64-gcc5/sparc-allmodconfig


 + error: modpost: "riscv_cbom_block_size" [arch/riscv/kvm/kvm.ko] undefined!:  
=> N/A


riscv-gcc11/riscv-defconfig
riscv-gcc11/rv32_defconfig


 + {standard input}: Error: branch to a symbol in another ISA mode: 1339 => 
2616, 2621


mips-gcc11/micro32r2_defconfig
mips-gcc11/micro32r2el_defconfig


 + {standard input}: Error: displacement to undefined symbol .L377 overflows 
12-bit field:  => 2286
 + {standard input}: Error: displacement to undefined symbol .L378 overflows 8-bit 
field :  => 2302
 + {standard input}: Error: displacement to undefined symbol .L382 overflows 8-bit 
field :  => 2213
 + {standard input}: Error: pcrel too far:  => 2247, 2261, 2232, 2231, 2262, 
2216, 2204, 2248, 2274, 2293, 2217, 2206, 2221, 2229, 2209, 2249, 2259, 2215
 + {standard input}: Error: unknown pseudo-op: `.l':  => 2305


sh4-gcc11/sh-all{mod,yes}config (ICE in drivers/net/pcs/pcs-xpcs.o)

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

1 2 >

1 - 100 of 160 matches

Mail list logo