[RESEND PATCH v4 2/2] drm/bridge: anx7625: disable regulators when power off

2021-03-31 Thread Hsin-Yi Wang
When suspending the driver, anx7625_power_standby() will be called to
turn off reset-gpios and enable-gpios. However, power supplies are not
disabled. To save power, the driver can get the power supply regulators
and turn off them in anx7625_power_standby().

Signed-off-by: Hsin-Yi Wang 
Reviewed-by: Robert Foss 
Reviewed-by: Xin Ji 
---
 drivers/gpu/drm/bridge/analogix/anx7625.c | 34 +++
 drivers/gpu/drm/bridge/analogix/anx7625.h |  1 +
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c 
b/drivers/gpu/drm/bridge/analogix/anx7625.c
index 65cc05982f826..23283ba0c4f93 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -875,12 +876,25 @@ static int sp_tx_edid_read(struct anx7625_data *ctx,
 static void anx7625_power_on(struct anx7625_data *ctx)
 {
struct device *dev = >client->dev;
+   int ret, i;
 
if (!ctx->pdata.low_power_mode) {
DRM_DEV_DEBUG_DRIVER(dev, "not low power mode!\n");
return;
}
 
+   for (i = 0; i < ARRAY_SIZE(ctx->pdata.supplies); i++) {
+   ret = regulator_enable(ctx->pdata.supplies[i].consumer);
+   if (ret < 0) {
+   DRM_DEV_DEBUG_DRIVER(dev, "cannot enable supply %d: 
%d\n",
+i, ret);
+   goto reg_err;
+   }
+   usleep_range(2000, 2100);
+   }
+
+   usleep_range(4000, 4100);
+
/* Power on pin enable */
gpiod_set_value(ctx->pdata.gpio_p_on, 1);
usleep_range(1, 11000);
@@ -889,11 +903,16 @@ static void anx7625_power_on(struct anx7625_data *ctx)
usleep_range(1, 11000);
 
DRM_DEV_DEBUG_DRIVER(dev, "power on !\n");
+   return;
+reg_err:
+   for (--i; i >= 0; i--)
+   regulator_disable(ctx->pdata.supplies[i].consumer);
 }
 
 static void anx7625_power_standby(struct anx7625_data *ctx)
 {
struct device *dev = >client->dev;
+   int ret;
 
if (!ctx->pdata.low_power_mode) {
DRM_DEV_DEBUG_DRIVER(dev, "not low power mode!\n");
@@ -904,6 +923,12 @@ static void anx7625_power_standby(struct anx7625_data *ctx)
usleep_range(1000, 1100);
gpiod_set_value(ctx->pdata.gpio_p_on, 0);
usleep_range(1000, 1100);
+
+   ret = regulator_bulk_disable(ARRAY_SIZE(ctx->pdata.supplies),
+ctx->pdata.supplies);
+   if (ret < 0)
+   DRM_DEV_DEBUG_DRIVER(dev, "cannot disable supplies %d\n", ret);
+
DRM_DEV_DEBUG_DRIVER(dev, "power down\n");
 }
 
@@ -1742,6 +1767,15 @@ static int anx7625_i2c_probe(struct i2c_client *client,
platform->client = client;
i2c_set_clientdata(client, platform);
 
+   pdata->supplies[0].supply = "vdd10";
+   pdata->supplies[1].supply = "vdd18";
+   pdata->supplies[2].supply = "vdd33";
+   ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(pdata->supplies),
+ pdata->supplies);
+   if (ret) {
+   DRM_DEV_ERROR(dev, "fail to get power supplies: %d\n", ret);
+   return ret;
+   }
anx7625_init_gpio(platform);
 
atomic_set(>power_status, 0);
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h 
b/drivers/gpu/drm/bridge/analogix/anx7625.h
index 193ad86c54503..e4a086b3a3d7b 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.h
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.h
@@ -350,6 +350,7 @@ struct s_edid_data {
 struct anx7625_platform_data {
struct gpio_desc *gpio_p_on;
struct gpio_desc *gpio_reset;
+   struct regulator_bulk_data supplies[3];
struct drm_bridge *panel_bridge;
int intp_irq;
u32 low_power_mode;
-- 
2.31.0.291.g576ba9dcdaf-goog

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[RESEND PATCH v4 1/2] dt-bindings: drm/bridge: anx7625: Add power supplies

2021-03-31 Thread Hsin-Yi Wang
anx7625 requires 3 power supply regulators.

Signed-off-by: Hsin-Yi Wang 
Reviewed-by: Rob Herring 
Reviewed-by: Robert Foss 
---
v3->v4: rebase to drm-misc/for-linux-next
---
 .../bindings/display/bridge/analogix,anx7625.yaml | 15 +++
 1 file changed, 15 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
index c789784efe306..ab48ab2f4240d 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -34,6 +34,15 @@ properties:
 description: used for reset chip control, RESET_N pin B7.
 maxItems: 1
 
+  vdd10-supply:
+description: Regulator that provides the supply 1.0V power.
+
+  vdd18-supply:
+description: Regulator that provides the supply 1.8V power.
+
+  vdd33-supply:
+description: Regulator that provides the supply 3.3V power.
+
   ports:
 $ref: /schemas/graph.yaml#/properties/ports
 
@@ -55,6 +64,9 @@ properties:
 required:
   - compatible
   - reg
+  - vdd10-supply
+  - vdd18-supply
+  - vdd33-supply
   - ports
 
 additionalProperties: false
@@ -72,6 +84,9 @@ examples:
 reg = <0x58>;
 enable-gpios = < 45 GPIO_ACTIVE_HIGH>;
 reset-gpios = < 73 GPIO_ACTIVE_HIGH>;
+vdd10-supply = <_mipibrdg>;
+vdd18-supply = <_mipibrdg>;
+vdd33-supply = <_mipibrdg>;
 
 ports {
 #address-cells = <1>;
-- 
2.31.0.291.g576ba9dcdaf-goog

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v7 3/8] mm/rmap: Split try_to_munlock from try_to_unmap

2021-03-31 Thread Alistair Popple
On Wednesday, 31 March 2021 10:57:46 PM AEDT Jason Gunthorpe wrote:
> On Wed, Mar 31, 2021 at 03:15:47PM +1100, Alistair Popple wrote:
> > On Wednesday, 31 March 2021 2:56:38 PM AEDT John Hubbard wrote:
> > > On 3/30/21 3:56 PM, Alistair Popple wrote:
> > > ...
> > > >> +1 for renaming "munlock*" items to "mlock*", where applicable. good 
> > grief.
> > > > 
> > > > At least the situation was weird enough to prompt further 
investigation :)
> > > > 
> > > > Renaming to mlock* doesn't feel like the right solution to me either 
> > though. I
> > > > am not sure if you saw me responding to myself earlier but I am 
thinking
> > > > renaming try_to_munlock() -> page_mlocked() and try_to_munlock_one() -
>
> > > > page_mlock_one() might be better. Thoughts?
> > > > 
> > > 
> > > Quite confused by this naming idea. Because: try_to_munlock() returns
> > > void, so a boolean-style name such as "page_mlocked()" is already not a
> > > good fit.
> > > 
> > > Even more important, though, is that try_to_munlock() is mlock-ing the
> > > page, right? Is there some subtle point I'm missing? It really is doing
> > > an mlock to the best of my knowledge here. Although the kerneldoc
> > > comment for try_to_munlock() seems questionable too:
> > 
> > It's mlocking the page if it turns out it still needs to be locked after 
> > unlocking it. But I don't think you're missing anything.
> 
> It is really searching all VMA's to see if the VMA flag is set and if
> any are found then it mlocks the page.
> 
> But presenting this rountine in its simplified form raises lots of
> questions:
> 
>  - What locking is being used to read the VMA flag?
>  - Why do we need to manipulate global struct page flags under the
>page table locks of a single VMA?

I was wondering that and questioned it in an earlier version of this series. I 
have done some digging and the commit log for b87537d9e2fe ("mm: rmap use pte 
lock not mmap_sem to set PageMlocked") provides the original justification.

It's fairly long so I won't quote it here but the summary seems to be that 
among other things the combination of page lock and ptl makes this safe. I 
have yet to verify if everything there still holds and is sensible, but the 
last paragraph certainly is :-)

"Stopped short of separating try_to_munlock_one() from try_to_munmap_one()
on this occasion, but that's probably the sensible next step - with a
rename, given that try_to_munlock()'s business is to try to set Mlocked."

>  - Why do we need to check for huge pages inside the VMA loop, not
>before going to the rmap? PageTransCompoundHead() is not sensitive to
>the PTEs. (and what happens if the huge page breaks up concurrently?)
>  - Why do we clear the mlock bit then run around to try and set it?

I don't have an answer for that as I'm not (yet) across all the mlock code 
paths, but I'm hoping this patch at least won't change anything.

>Feels racey.
>
> Jason
> 




___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 32/34] drm/amdkfd: multiple gpu migrate vram to vram

2021-03-31 Thread Felix Kuehling
If prefetch range to gpu with acutal location is another gpu, or GPU
retry fault restore pages to migrate the range with acutal location is
gpu, then migrate from one gpu to another gpu.

Use system memory as bridge because sdma engine may not able to access
another gpu vram, use sdma of source gpu to migrate to system memory,
then use sdma of destination gpu to migrate from system memory to gpu.

Print out gpuid or gpuidx in debug messages.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++-
 3 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 73701406acb3..d111f88897db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -486,8 +486,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
-   struct mm_struct *mm)
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -741,6 +742,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm)
return r;
 }
 
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+struct mm_struct *mm)
+{
+   int r;
+
+   /*
+* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+* system memory as migration bridge
+*/
+
+   pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+   r = svm_migrate_vram_to_ram(prange, mm);
+   if (r)
+   return r;
+
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
+{
+   if  (!prange->actual_loc)
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+   else
+   return svm_migrate_vram_to_vram(prange, best_loc, mm);
+
+}
+
 /**
  * svm_migrate_to_ram - CPU page fault handler
  * @vmf: CPU vm fault vma, address
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 9949b55d3b6a..bc680619d135 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,8 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
-   struct mm_struct *mm);
+int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index adb79b10f874..dc0f523ce321 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -347,8 +347,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
-static bool svm_range_validate_svm_bo(struct svm_range *prange)
+static bool
+svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
 {
+   struct amdgpu_device *bo_adev;
+
mutex_lock(>lock);
if (!prange->svm_bo) {
mutex_unlock(>lock);
@@ -360,6 +363,22 @@ static bool svm_range_validate_svm_bo(struct svm_range 
*prange)
return true;
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   /*
+* Migrate from GPU to GPU, remove range from source bo_adev
+* svm_bo range list, and return false to allocate svm_bo from
+* destination adev.
+*/
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   if (bo_adev != adev) {
+   mutex_unlock(>lock);
+
+   spin_lock(>svm_bo->list_lock);
+   list_del_init(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+
+   

[PATCH 31/34] drm/amdkfd: add svm range validate timestamp

2021-03-31 Thread Felix Kuehling
With xnack on, add validate timestamp in order to handle GPU vm fault
from multiple GPUs.

If GPU retry fault need migrate the range to the best restore location,
use range validate timestamp to record system timestamp after range is
restored to update GPU page table.

Because multiple pages of same range have multiple retry fault, define
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to the long time period that
pending retry fault may still comes after page table update, to skip
duplicate retry fault of same range.

If difference between system timestamp and range last validate timestamp
is bigger than AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING, that means the
retry fault is from another GPU, then continue to handle retry fault
recover.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 61bf2df38e72..adb79b10f874 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,11 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
+
 static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -268,6 +273,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
+   prange->validate_timestamp = ktime_to_us(ktime_get());
mutex_init(>migrate_mutex);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
@@ -1358,6 +1364,9 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
 unreserve_out:
svm_range_unreserve_bos();
 
+   if (!r)
+   prange->validate_timestamp = ktime_to_us(ktime_get());
+
return r;
 }
 
@@ -2098,6 +2107,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
+   uint64_t timestamp;
int32_t best_loc, gpuidx;
int r = 0;
 
@@ -2133,6 +2143,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mutex_lock(>migrate_mutex);
+   timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
+   /* skip duplicate vm fault on different pages of same range */
+   if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+   pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+svms, prange->start, prange->last);
+   goto out_unlock_range;
+   }
 
best_loc = svm_range_best_restore_location(prange, adev, );
if (best_loc == -1) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index f157be434cfa..af853726b861 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -85,6 +85,7 @@ struct svm_work_list_item {
  * @actual_loc: the actual location, 0 for CPU, or GPU id
  * @granularity:migration granularity, log2 num pages
  * @invalid:not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
  * @notifier:   register mmu interval notifier
  * @work_item:  deferred work item information
  * @deferred_list: list header used to add range to deferred list
@@ -121,6 +122,7 @@ struct svm_range {
uint32_tactual_loc;
uint8_t granularity;
atomic_tinvalid;
+   uint64_tvalidate_timestamp;
struct mmu_interval_notifiernotifier;
struct svm_work_list_item   work_item;
struct list_headdeferred_list;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 30/34] drm/amdkfd: refine migration policy with xnack on

2021-03-31 Thread Felix Kuehling
With xnack on, GPU vm fault handler decide the best restore location,
then migrate range to the best restore location and update GPU mapping
to recover the GPU vm fault.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |   7 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  16 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 136 +--
 5 files changed, 150 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 552c4f656e2d..73701406acb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,18 +479,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @mm: the process mm structure
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
+int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   struct mm_struct *mm;
int r = 0;
 
if (prange->actual_loc == best_loc) {
@@ -514,8 +515,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
 
-   mm = current->mm;
-
for (addr = start; addr < end;) {
unsigned long next;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 95fd7b21791f..9949b55d3b6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,7 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc);
+int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ca1b0c518d46..bce44164f1e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 
 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+  struct amdgpu_device *adev, uint32_t *gpuid,
+  uint32_t *gpuidx);
 static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpuidx, uint32_t *gpuid) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index f897c1d0ea66..1d6310f63ae9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1673,6 +1673,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, 
uint32_t gpu_id)
return -EINVAL;
 }
 
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+  uint32_t *gpuid, uint32_t *gpuidx)
+{
+   struct kgd_dev *kgd = (struct kgd_dev *)adev;
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+   *gpuid = p->pdds[i]->dev->id;
+   *gpuidx = i;
+   return 0;
+   }
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 77da6c68fab2..61bf2df38e72 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1283,6 +1283,24 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
if (gpuidx < MAX_GPU_INSTANCE) {
bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
bitmap_set(ctx.bitmap, gpuidx, 1);
+   } else if (ctx.process->xnack_enabled) {
+   bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+   

[PATCH 34/34] drm/amdkfd: Add CONFIG_HSA_AMD_SVM

2021-03-31 Thread Felix Kuehling
Control whether to build SVM support into amdgpu with a Kconfig option.
This makes it easier to disable it in production kernels if this new
feature causes problems in production environments.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Kconfig   | 15 +++-
 drivers/gpu/drm/amd/amdkfd/Makefile  |  9 ---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  7 ++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 17 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 30 
 5 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 7880fc101a3b..d03a79e14126 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -8,8 +8,21 @@ config HSA_AMD
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
-   select DEVICE_PRIVATE
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
help
  Enable this if you want to use HSA features on AMD GPU devices.
+
+config HSA_AMD_SVM
+   bool "Enable HMM-based shared virtual memory manager"
+   depends on HSA_AMD
+   default y
+   select HMM_MIRROR
+   select MMU_NOTIFIER
+   select DEVICE_PRIVATE
+   help
+ Enable this to use unified memory and managed memory in HIP. This
+ memory manager supports two modes of operation. One based on
+ preemptions and one based on page faults. To enable page fault
+ based memory management on most GFXv9 GPUs, set the module
+ parameter amdgpu.noretry=0.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index a93301dbc464..c4f3aff11072 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -54,9 +54,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
-   $(AMDKFD_PATH)/kfd_crat.o \
-   $(AMDKFD_PATH)/kfd_svm.o \
-   $(AMDKFD_PATH)/kfd_migrate.o
+   $(AMDKFD_PATH)/kfd_crat.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
@@ -65,3 +63,8 @@ endif
 ifneq ($(CONFIG_DEBUG_FS),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
 endif
+
+ifneq ($(CONFIG_HSA_AMD_SVM),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
+   $(AMDKFD_PATH)/kfd_migrate.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9838d0cd1f51..f60c44dbae3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1768,6 +1768,7 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
return r;
 }
 
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
struct kfd_ioctl_svm_args *args = data;
@@ -1793,6 +1794,12 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 
return r;
 }
+#else
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+   return -EPERM;
+}
+#endif
 
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index bc680619d135..9119b75b3853 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -24,6 +24,8 @@
 #ifndef KFD_MIGRATE_H_
 #define KFD_MIGRATE_H_
 
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
 #include 
 #include 
 #include 
@@ -43,17 +45,20 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 
-#if defined(CONFIG_DEVICE_PRIVATE)
 int svm_migrate_init(struct amdgpu_device *adev);
 void svm_migrate_fini(struct amdgpu_device *adev);
 
 #else
+
 static inline int svm_migrate_init(struct amdgpu_device *adev)
 {
-   DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, "
- "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n");
-   return -ENODEV;
+   return 0;
+}
+static inline void svm_migrate_fini(struct amdgpu_device *adev)
+{
+   /* empty */
 }
-static inline void svm_migrate_fini(struct amdgpu_device *adev) {}
-#endif
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
 #endif /* KFD_MIGRATE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index af853726b861..363c282f8747 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -24,6 +24,8 @@
 #ifndef KFD_SVM_H_
 #define KFD_SVM_H_
 
+#if 

[PATCH 33/34] drm/amdkfd: Add SVM API support capability bits

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

SVMAPISupported property added to HSA_CAPABILITY, the value match
HSA_CAPABILITY defined in Thunk spec:

SVMAPISupported: it will not be supported on older kernels that don't
have HMM or on systems with GFXv8 or older GPUs without support for
48-bit virtual addresses.

CoherentHostAccess property added to HSA_MEMORYPROPERTY, the value match
HSA_MEMORYPROPERTY defined in Thunk spec:

CoherentHostAccess: whether or not device memory can be coherently
accessed by the host CPU.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index cdef608db4f4..ab9fe854b4d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1419,6 +1419,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.capability |= (adev->ras_features != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
 
+   /* SVM API and HMM page migration work together, device memory type
+* is initalized to not 0 when page migration register device memory.
+*/
+   if (adev->kfd.dev->pgmap.type != 0)
+   dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
kfd_debug_print_topology();
 
if (!res)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index b8b68087bd7a..6bd6380b0ee0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -53,8 +53,9 @@
 #define HSA_CAP_ASIC_REVISION_MASK 0x03c0
 #define HSA_CAP_ASIC_REVISION_SHIFT22
 #define HSA_CAP_SRAM_EDCSUPPORTED  0x0400
+#define HSA_CAP_SVMAPI_SUPPORTED   0x0800
 
-#define HSA_CAP_RESERVED   0xf80f8000
+#define HSA_CAP_RESERVED   0xf00f8000
 
 struct kfd_node_properties {
uint64_t hive_id;
@@ -98,9 +99,10 @@ struct kfd_node_properties {
 #define HSA_MEM_HEAP_TYPE_GPU_LDS  4
 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
 
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
-#define HSA_MEM_FLAGS_RESERVED 0xfffc
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
+#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
+#define HSA_MEM_FLAGS_COHERENTHOSTACCESS   0x0004
+#define HSA_MEM_FLAGS_RESERVED 0xfff8
 
 struct kfd_mem_properties {
struct list_headlist;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 29/34] drm/amdgpu: reserve fence slot to update page table

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Forgot to reserve a fence slot to use sdma to update page table, cause
below kernel BUG backtrace to handle vm retry fault while application is
exiting.

[  133.048143] kernel BUG at 
/home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281!
[  133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu]
[  133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280
[  133.048672]  amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu]
[  133.048788]  amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu]
[  133.048905]  amdgpu_vm_handle_fault+0x202/0x370 [amdgpu]
[  133.049031]  gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu]
[  133.049165]  ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu]
[  133.049289]  ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049408]  amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049534]  amdgpu_ih_process+0x9b/0x1c0 [amdgpu]
[  133.049657]  amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu]
[  133.049669]  process_one_work+0x29f/0x640
[  133.049678]  worker_thread+0x39/0x3f0
[  133.049685]  ? process_one_work+0x640/0x640

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 83c020411e52..7f696f5c55a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
-   long r;
+   int r;
bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
@@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
value = 0;
}
 
+   r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+   if (r) {
+   pr_debug("failed %d to reserve fence slot\n", r);
+   goto error_unlock;
+   }
+
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
addr, flags, value, NULL, NULL,
NULL);
@@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
 error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
-   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+   DRM_ERROR("Can't handle page fault (%d)\n", r);
 
 error_unref:
amdgpu_bo_unref();
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 28/34] drm/amdgpu: add svm_bo eviction to enable_signal cb

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

Add to amdgpu_amdkfd_fence.enable_signal callback, support
for svm_bo fence eviction.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 53559643c712..1fe233cddb20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
 
 static const struct dma_fence_ops amdkfd_fence_ops;
 static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -123,9 +124,13 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence 
*f)
if (dma_fence_is_signaled(f))
return true;
 
-   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
-   return true;
-
+   if (!fence->svm_bo) {
+   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+   return true;
+   } else {
+   if (!svm_range_schedule_evict_svm_bo(fence))
+   return true;
+   }
return false;
 }
 
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 26/34] drm/amdkfd: add svm_bo eviction mechanism support

2021-03-31 Thread Felix Kuehling
svm_bo eviction mechanism is different from regular BOs.
Every SVM_BO created contains one eviction fence and one
worker item for eviction process.
SVM_BOs can be attached to one or more pranges.
For SVM_BO eviction mechanism, TTM will start to call
enable_signal callback for every SVM_BO until VRAM space
is available.
Here, all the ttm_evict calls are synchronous, this guarantees
that each eviction has completed and the fence has signaled before
it returns.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 201 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  13 +-
 2 files changed, 168 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7b2c97b7f48c..77da6c68fab2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,7 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -319,7 +320,15 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(_bo->list_lock);
}
spin_unlock(_bo->list_lock);
-
+   if (!dma_fence_is_signaled(_bo->eviction_fence->base)) {
+   /* We're not in the eviction worker.
+* Signal the fence and synchronize with any
+* pending eviction work.
+*/
+   dma_fence_signal(_bo->eviction_fence->base);
+   cancel_work_sync(_bo->eviction_work);
+   }
+   dma_fence_put(_bo->eviction_fence->base);
amdgpu_bo_unref(_bo->bo);
kfree(svm_bo);
 }
@@ -332,6 +341,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
+static bool svm_range_validate_svm_bo(struct svm_range *prange)
+{
+   mutex_lock(>lock);
+   if (!prange->svm_bo) {
+   mutex_unlock(>lock);
+   return false;
+   }
+   if (prange->ttm_res) {
+   /* We still have a reference, all is well */
+   mutex_unlock(>lock);
+   return true;
+   }
+   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   if (READ_ONCE(prange->svm_bo->evicting)) {
+   struct dma_fence *f;
+   struct svm_range_bo *svm_bo;
+   /* The BO is getting evicted,
+* we need to get a new one
+*/
+   mutex_unlock(>lock);
+   svm_bo = prange->svm_bo;
+   f = dma_fence_get(_bo->eviction_fence->base);
+   svm_range_bo_unref(prange->svm_bo);
+   /* wait for the fence to avoid long spin-loop
+* at list_empty_careful
+*/
+   dma_fence_wait(f, false);
+   dma_fence_put(f);
+   } else {
+   /* The BO was still around and we got
+* a new reference to it
+*/
+   mutex_unlock(>lock);
+   pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   prange->ttm_res = >svm_bo->bo->tbo.mem;
+   return true;
+   }
+
+   } else {
+   mutex_unlock(>lock);
+   }
+
+   /* We need a new svm_bo. Spin-loop to wait for concurrent
+* svm_range_bo_release to finish removing this range from
+* its range list. After this, it is safe to reuse the
+* svm_bo pointer and svm_bo_list head.
+*/
+   while (!list_empty_careful(>svm_bo_list))
+   ;
+
+   return false;
+}
+
 static struct svm_range_bo *svm_range_bo_new(void)
 {
struct svm_range_bo *svm_bo;
@@ -351,72 +415,56 @@ int
 svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bool clear)
 {
-   struct amdkfd_process_info *process_info;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
struct amdgpu_bo *bo;
struct kfd_process *p;
+   struct mm_struct *mm;
int r;
 
-   pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last);
-   mutex_lock(>lock);
-   if (prange->svm_bo) {
-   if (prange->ttm_res) {
-   /* We still have a reference, all is well */
-   mutex_unlock(>lock);
-   return 0;
-   }
-   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
-

[PATCH 27/34] drm/amdgpu: svm bo enable_signal call condition

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

[why]
To support svm bo eviction mechanism.

[how]
If the BO crated has AMDGPU_AMDKFD_CREATE_SVM_BO flag set,
enable_signal callback will be called inside amdgpu_evict_flags.
This also causes gutting of the BO by removing all placements,
so that TTM won't actually do an eviction. Instead it will discard
the memory held by the BO. This is needed for HMM migration to user
mode system memory pages.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a2585058e65d..17e0f3b60c18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -111,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object 
*bo,
}
 
abo = ttm_to_amdgpu_bo(bo);
+   if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+   struct dma_fence *fence;
+   struct dma_resv *resv = >base._resv;
+
+   rcu_read_lock();
+   fence = rcu_dereference(resv->fence_excl);
+   if (fence && !fence->ops->signaled)
+   dma_fence_enable_sw_signaling(fence);
+
+   placement->num_placement = 0;
+   placement->num_busy_placement = 0;
+   rcu_read_unlock();
+   return;
+   }
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 25/34] drm/amdgpu: add param bit flag to create SVM BOs

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

Add CREATE_SVM_BO define bit for SVM BOs.
Another define flag was moved to concentrate these
KFD type flags in one include file.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h   | 4 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9af644f256e9..bc38de8c5c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -33,9 +33,6 @@
 #include 
 #include "amdgpu_xgmi.h"
 
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
  */
@@ -217,7 +214,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo)
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 
-   if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+   if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
@@ -1278,7 +1275,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
-   bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+   bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 
(*mem)->va = va;
(*mem)->domain = domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 25411b2c4dd9..b07903d317e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -37,6 +37,10 @@
 #define AMDGPU_BO_INVALID_OFFSET   LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS   3
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO(1ULL << 63)
+#define AMDGPU_AMDKFD_CREATE_SVM_BO(1ULL << 62)
+
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 
 struct amdgpu_bo_param {
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 24/34] drm/amdkfd: add svm_bo reference for eviction fence

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

[why]
As part of the SVM functionality, the eviction mechanism used for
SVM_BOs is different. This mechanism uses one eviction fence per prange,
instead of one fence per kfd_process.

[how]
A svm_bo reference to amdgpu_amdkfd_fence to allow differentiate between
SVM_BO or regular BO evictions. This also include modifications to set the
reference at the fence creation call.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 --
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..beb2ef070a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
+   struct svm_range_bo *svm_bo;
 };
 
 struct amdgpu_kfd_dev {
@@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct 
amdgpu_device *adev,
int queue_bit);
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-   struct 
mm_struct *mm);
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo);
 #if IS_ENABLED(CONFIG_HSA_AMD)
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 5af464933976..53559643c712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -60,7 +60,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
  */
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-  struct mm_struct *mm)
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo)
 {
struct amdgpu_amdkfd_fence *fence;
 
@@ -73,7 +74,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 
context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(>lock);
-
+   fence->svm_bo = svm_bo;
dma_fence_init(>base, _fence_ops, >lock,
   context, atomic_inc_return(_seq));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e93850f2f3b1..9af644f256e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -970,7 +970,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
**process_info,
 
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-  current->mm);
+  current->mm,
+  NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -2188,7 +2189,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence **ef)
 */
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
-   process_info->eviction_fence->mm);
+   process_info->eviction_fence->mm,
+   NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 22/34] drm/amdkfd: page table restore through svm API

2021-03-31 Thread Felix Kuehling
Page table restore implementation in SVM API. This is called from
the fault handler at amdgpu_vm. To update page tables through
the page fault retry IH.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 59 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
 2 files changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7da58a4214c0..7b2c97b7f48c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1964,6 +1964,65 @@ svm_range_from_addr(struct svm_range_list *svms, 
unsigned long addr,
return NULL;
 }
 
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   int r = 0;
+   struct mm_struct *mm = NULL;
+   struct svm_range *prange;
+   struct svm_range_list *svms;
+   struct kfd_process *p;
+
+   p = kfd_lookup_process_by_pasid(pasid);
+   if (!p) {
+   pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+   return -ESRCH;
+   }
+   if (!p->xnack_enabled) {
+   pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+   return -EFAULT;
+   }
+   svms = >svms;
+
+   pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+   mm = get_task_mm(p->lead_thread);
+   if (!mm) {
+   pr_debug("svms 0x%p failed to get mm\n", svms);
+   r = -ESRCH;
+   goto out;
+   }
+
+   mmap_read_lock(mm);
+   mutex_lock(>lock);
+   prange = svm_range_from_addr(svms, addr, NULL);
+
+   if (!prange) {
+   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+svms, addr);
+   r = -EFAULT;
+   goto out_unlock_svms;
+   }
+
+   mutex_lock(>migrate_mutex);
+
+   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, 
false);
+   if (r)
+   pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r,
+svms, prange->start, prange->last);
+
+   mutex_unlock(>migrate_mutex);
+out_unlock_svms:
+   mutex_unlock(>lock);
+   mmap_read_unlock(mm);
+   mmput(mm);
+out:
+   kfd_unref_process(p);
+
+   return r;
+}
+
 void svm_range_list_fini(struct kfd_process *p)
 {
struct svm_range *prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6cc12de8d76a..168c623f4477 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -154,6 +154,8 @@ void svm_range_vram_node_free(struct svm_range *prange);
 int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
   unsigned long addr, struct svm_range *parent,
   struct svm_range *prange);
+int svm_range_restore_pages(struct amdgpu_device *adev,
+   unsigned int pasid, uint64_t addr);
 void svm_range_add_list_work(struct svm_range_list *svms,
 struct svm_range *prange, struct mm_struct *mm,
 enum svm_work_list_ops op);
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 23/34] drm/amdkfd: SVM API call to restore page tables

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

Use SVM API to restore page tables when retry fault and
compute context are enabled.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7e306fd20de4..83c020411e52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "kfd_svm.h"
 
 /**
  * DOC: GPUVM
@@ -3302,18 +3303,29 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
uint64_t value, flags;
struct amdgpu_vm *vm;
long r;
+   bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
vm = idr_find(>vm_manager.pasid_idr, pasid);
-   if (vm)
+   if (vm) {
root = amdgpu_bo_ref(vm->root.base.bo);
-   else
+   is_compute_context = vm->is_compute_context;
+   } else {
root = NULL;
+   }
spin_unlock(>vm_manager.pasid_lock);
 
if (!root)
return false;
 
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+
+   if (is_compute_context &&
+   !svm_range_restore_pages(adev, pasid, addr)) {
+   amdgpu_bo_unref();
+   return true;
+   }
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@@ -3327,18 +3339,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
if (!vm)
goto error_unlock;
 
-   addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
 
-   if (vm->is_compute_context) {
+   if (is_compute_context) {
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
AMDGPU_PTE_TF;
value = 0;
-
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 20/34] drm/amdkfd: invalidate tables on page retry fault

2021-03-31 Thread Felix Kuehling
GPU page tables are invalidated by unmapping prange directly at
the mmu notifier, when page fault retry is enabled through
amdgpu_noretry global parameter. The restore page table is
performed at the page fault handler.

If xnack is on, we update GPU mappings after migration to avoid
unnecessary GPUVM faults.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  4 +-
 3 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 81bae0adc0cf..552c4f656e2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -807,7 +807,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
 prange, prange->start, prange->last);
 
-   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && parent == prange)
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+   else
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
svm_range_add_list_work(>svms, parent, mm, op);
schedule_deferred_list_work(>svms);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a83a7242c760..7da58a4214c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -912,6 +912,13 @@ svm_range_split_by_granularity(struct kfd_process *p, 
struct mm_struct *mm,
svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
}
 
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
+   prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+   pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+prange, prange->start, prange->last,
+SVM_OP_ADD_RANGE_AND_MAP);
+   }
return 0;
 }
 
@@ -1418,25 +1425,54 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
unsigned long start, unsigned long last)
 {
struct svm_range_list *svms = prange->svms;
-   int invalid, evicted_ranges;
+   struct kfd_process *p;
int r = 0;
 
-   invalid = atomic_inc_return(>invalid);
-   evicted_ranges = atomic_inc_return(>evicted_ranges);
-   if (evicted_ranges != 1)
-   return r;
+   p = container_of(svms, struct kfd_process, svms);
 
-   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
-prange->svms, prange->start, prange->last);
+   pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+svms, prange->start, prange->last, start, last);
 
-   /* First eviction, stop the queues */
-   r = kgd2kfd_quiesce_mm(mm);
-   if (r)
-   pr_debug("failed to quiesce KFD\n");
+   if (!p->xnack_enabled) {
+   int invalid, evicted_ranges;
 
-   pr_debug("schedule to restore svm %p ranges\n", svms);
-   schedule_delayed_work(>restore_work,
-   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   invalid = atomic_inc_return(>invalid);
+   evicted_ranges = atomic_inc_return(>evicted_ranges);
+   if (evicted_ranges != 1)
+   return r;
+
+   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   /* First eviction, stop the queues */
+   r = kgd2kfd_quiesce_mm(mm);
+   if (r)
+   pr_debug("failed to quiesce KFD\n");
+
+   pr_debug("schedule to restore svm %p ranges\n", svms);
+   schedule_delayed_work(>restore_work,
+   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   } else {
+   struct svm_range *pchild;
+   unsigned long s, l;
+
+   pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+prange->svms, start, last);
+   svm_range_lock(prange);
+   list_for_each_entry(pchild, >child_list, child_list) {
+   mutex_lock_nested(>lock, 1);
+   s = max(start, pchild->start);
+   l = min(last, pchild->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(pchild, s, l);
+   mutex_unlock(>lock);
+   }
+   s = max(start, prange->start);
+   l = min(last, prange->last);
+   if 

[PATCH 21/34] drm/amdgpu: enable 48-bit IH timestamp counter

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

By default this timestamp is 32 bit counter. It gets
overflowed in around 10 minutes.

Change-Id: I7c46604b0272dcfd1ce24351437c16fe53dca0ab
Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ca8efa5c6978..2f17c8a57015 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct 
amdgpu_device *adev,
 
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+   tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == >irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 
0));
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 19/34] drm/amdkfd: HMM migrate vram to ram

2021-03-31 Thread Felix Kuehling
If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start
migrate memory from vram to ram in steps:

1. migrate_vma_pages get vram pages, and notify HMM to invalidate the
pages, HMM interval notifier callback evict process queues
2. Allocate system memory pages
3. Use svm copy memory to migrate data from vram to ram
4. migrate_vma_pages copy pages structure from vram pages to ram pages
5. Return VM_FAULT_SIGBUS if migration failed, to notify application
6. migrate_vma_finalize put vram pages, page_free callback free vram
pages and vram nodes
7. Restore work wait for migration is finished, then update GPU page
table mapping to system memory, and resume process queues

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 300 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 123 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  10 +
 4 files changed, 426 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fcaf34096820..81bae0adc0cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -259,6 +259,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, 
unsigned long addr)
put_page(page);
 }
 
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+   unsigned long addr;
+
+   addr = page_to_pfn(page) << PAGE_SHIFT;
+   return (addr - adev->kfd.dev->pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+   struct page *page;
+
+   page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+   if (page)
+   lock_page(page);
+
+   return page;
+}
+
+void svm_migrate_put_sys_page(unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(addr >> PAGE_SHIFT);
+   unlock_page(page);
+   put_page(page);
+}
 
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
@@ -511,13 +540,213 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
 
 static void svm_migrate_page_free(struct page *page)
 {
+   /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+   struct migrate_vma *migrate, struct dma_fence **mfence,
+   dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   uint64_t *src;
+   dma_addr_t *dst;
+   struct page *dpage;
+   uint64_t i = 0, j;
+   uint64_t addr;
+   int r = 0;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   addr = prange->start << PAGE_SHIFT;
+
+   src = (uint64_t *)(scratch + npages);
+   dst = scratch;
+
+   for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+   struct page *spage;
+
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   if (!spage) {
+   pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+   src[i] = svm_migrate_addr(adev, spage);
+   if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+   r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+src + i - j, j,
+FROM_VRAM_TO_RAM,
+mfence);
+   if (r)
+   goto out_oom;
+   j = 0;
+   }
+
+   dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+   if (!dpage) {
+   pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+
+   dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, 
DMA_FROM_DEVICE);
+   r = dma_mapping_error(dev, dst[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   goto out_oom;
+   }
+
+   pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
+ dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+
+   migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+   migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+   }
+
+   r = 

[PATCH 17/34] drm/amdkfd: copy memory through gart table

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Use sdma linear copy to migrate data between ram and vram. The sdma
linear copy command uses kernel buffer function queue to access system
memory through gart table.

Use reserved gart table window 0 to map system page address, and vram
page address is direct mapping. Use the same kernel buffer function to
fill in gart table mapping, so this is serialized with memory copy by
sdma job submit. We only need wait for the last memory copy sdma fence
for larger buffer migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 172 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   5 +
 2 files changed, 177 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 4bb39c562665..2a6824ddae88 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -32,6 +32,178 @@
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
 
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+   return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+   struct amdgpu_device *adev = ring->adev;
+   struct amdgpu_job *job;
+   unsigned int num_dw, num_bytes;
+   struct dma_fence *fence;
+   uint64_t src_addr, dst_addr;
+   uint64_t pte_flags;
+   void *cpu_addr;
+   int r;
+
+   /* use gart window 0 */
+   *gart_addr = adev->gmc.gart_start;
+
+   num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+   num_bytes = npages * 8;
+
+   r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+AMDGPU_IB_POOL_DELAYED, );
+   if (r)
+   return r;
+
+   src_addr = num_dw * 4;
+   src_addr += job->ibs[0].gpu_addr;
+
+   dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+   amdgpu_emit_copy_buffer(adev, >ibs[0], src_addr,
+   dst_addr, num_bytes, false);
+
+   amdgpu_ring_pad_ib(ring, >ibs[0]);
+   WARN_ON(job->ibs[0].length_dw > num_dw);
+
+   pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+   pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+   if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+   pte_flags |= AMDGPU_PTE_WRITEABLE;
+   pte_flags |= adev->gart.gart_pte_flags;
+
+   cpu_addr = >ibs[0].ptr[num_dw];
+
+   r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+   if (r)
+   goto error_free;
+
+   r = amdgpu_job_submit(job, >mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, );
+   if (r)
+   goto error_free;
+
+   dma_fence_put(fence);
+
+   return r;
+
+error_free:
+   amdgpu_job_free(job);
+   return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait 
for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+uint64_t *vram, uint64_t npages,
+enum MIGRATION_COPY_DIR direction,
+struct dma_fence **mfence)
+{
+   const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+   struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+   uint64_t gart_s, gart_d;
+   struct dma_fence *next;
+   uint64_t size;
+   int r;
+
+   mutex_lock(>mman.gtt_window_lock);
+
+   while (npages) {
+   size = min(GTT_MAX_PAGES, npages);
+
+   if (direction == FROM_VRAM_TO_RAM) {
+   gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+   r = svm_migrate_gart_map(ring, size, sys, _d, 0);
+
+   } else if (direction == FROM_RAM_TO_VRAM) {
+   r = svm_migrate_gart_map(ring, size, sys, _s,
+KFD_IOCTL_SVM_FLAG_GPU_RO);
+   gart_d = 

[PATCH 18/34] drm/amdkfd: HMM migrate ram to vram

2021-03-31 Thread Felix Kuehling
Register svm range with same address and size but perferred_location
is changed from CPU to GPU or from GPU to CPU, trigger migration the svm
range from ram to vram or from vram to ram.

If svm range prefetch location is GPU with flags
KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first,
then migrate it from ram to vram.

After migrating to vram is done, CPU access will have cpu page fault,
page fault handler migrate it back to ram and resume cpu access.

Migration steps:

1. migrate_vma_pages get svm range ram pages, notify the
interval is invalidated and unmap from CPU page table, HMM interval
notifier callback evict process queues
2. Allocate new pages in vram using TTM
3. Use svm copy memory to sdma copy data from ram to vram
4. migrate_vma_pages copy ram pages structure to vram pages structure
5. migrate_vma_finalize put ram pages to free ram pages and memory
6. Restore work wait for migration is finished, then update GPUs page
table mapping to new vram pages, resume process queues

If migrate_vma_setup failed to collect all ram pages of range, retry 3
times until success to start migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 305 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 197 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +
 4 files changed, 500 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 2a6824ddae88..fcaf34096820 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -204,6 +204,311 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct 
dma_fence *mfence)
return r;
 }
 
+static uint64_t
+svm_migrate_node_physical_addr(struct amdgpu_device *adev,
+  struct drm_mm_node **mm_node, uint64_t *offset)
+{
+   struct drm_mm_node *node = *mm_node;
+   uint64_t pos = *offset;
+
+   if (node->start == AMDGPU_BO_INVALID_OFFSET) {
+   pr_debug("drm node is not validated\n");
+   return 0;
+   }
+
+   pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
+node->size);
+
+   if (pos >= node->size) {
+   do  {
+   pos -= node->size;
+   node++;
+   } while (pos >= node->size);
+
+   *mm_node = node;
+   *offset = pos;
+   }
+
+   return (node->start + pos) << PAGE_SHIFT;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+   return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+   struct page *page;
+
+   page = pfn_to_page(pfn);
+   page->zone_device_data = prange;
+   get_page(page);
+   lock_page(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+   unlock_page(page);
+   put_page(page);
+}
+
+
+static int
+svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+struct migrate_vma *migrate, struct dma_fence **mfence,
+dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   struct drm_mm_node *node;
+   dma_addr_t *src;
+   uint64_t *dst;
+   uint64_t vram_addr;
+   uint64_t offset;
+   uint64_t i, j;
+   int r = -ENOMEM;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   src = scratch;
+   dst = (uint64_t *)(scratch + npages);
+
+   r = svm_range_vram_node_new(adev, prange, true);
+   if (r) {
+   pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+   goto out;
+   }
+
+   node = prange->ttm_res->mm_node;
+   offset = prange->offset;
+   vram_addr = svm_migrate_node_physical_addr(adev, , );
+   if (!vram_addr) {
+   WARN_ONCE(1, "vram node address is 0\n");
+   r = -ENOMEM;
+   goto out;
+   }
+
+   for (i = j = 0; i < npages; i++) {
+   struct page *spage;
+
+   dst[i] = vram_addr + (j << PAGE_SHIFT);
+   migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+   svm_migrate_get_vram_page(prange, migrate->dst[i]);
+
+   migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+   migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+
+   if (migrate->src[i] & MIGRATE_PFN_VALID) {
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   

[PATCH 16/34] drm/amdkfd: support xgmi same hive mapping

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

amdgpu_gmc_get_vm_pte use bo_va->is_xgmi same hive information to set
pte flags to update GPU mapping. Add local structure variable bo_va, and
update bo_va.is_xgmi, pass it to mapping->bo_va while mapping to GPU.

Assuming xgmi pstate is hi after boot.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f01bc3b4010d..f557f67b9d2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -26,6 +26,8 @@
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_mn.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
@@ -953,21 +955,27 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 struct svm_range *prange, dma_addr_t *dma_addr,
-struct dma_fence **fence)
+struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
+   struct amdgpu_bo_va bo_va;
uint64_t pte_flags;
int r = 0;
 
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 prange->last);
 
+   if (prange->svm_bo && prange->ttm_res) {
+   bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
+   prange->mapping.bo_va = _va;
+   }
+
prange->mapping.start = prange->start;
prange->mapping.last = prange->last;
prange->mapping.offset = prange->offset;
pte_flags = svm_range_get_pte_flags(adev, prange);
prange->mapping.flags = pte_flags;
 
-   r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL,
+   r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
prange->mapping.start,
prange->mapping.last, pte_flags,
prange->mapping.offset,
@@ -990,6 +998,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
*fence = dma_fence_get(vm->last_update);
 
 out:
+   prange->mapping.bo_va = NULL;
return r;
 }
 
@@ -997,12 +1006,18 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange,
 unsigned long *bitmap, bool wait)
 {
struct kfd_process_device *pdd;
+   struct amdgpu_device *bo_adev;
struct amdgpu_device *adev;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
int r = 0;
 
+   if (prange->svm_bo && prange->ttm_res)
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   else
+   bo_adev = NULL;
+
p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -1016,8 +1031,14 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange,
if (IS_ERR(pdd))
return -EINVAL;
 
+   if (bo_adev && adev != bo_adev &&
+   !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+   pr_debug("cannot map to device idx %d\n", gpuidx);
+   continue;
+   }
+
r = svm_range_map_to_gpu(adev, pdd->vm, prange,
-prange->dma_addr[gpuidx],
+prange->dma_addr[gpuidx], bo_adev,
 wait ?  : NULL);
if (r)
break;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 14/34] drm/amdkfd: register HMM device private zone

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to
allocate vram backing pages for page migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Kconfig   |   1 +
 drivers/gpu/drm/amd/amdkfd/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c  |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 103 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  48 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +
 6 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index f02c938f75da..7880fc101a3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -8,6 +8,7 @@ config HSA_AMD
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
+   select DEVICE_PRIVATE
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
help
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 387ce0217d35..a93301dbc464 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -55,7 +55,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o \
-   $(AMDKFD_PATH)/kfd_svm.o
+   $(AMDKFD_PATH)/kfd_svm.o \
+   $(AMDKFD_PATH)/kfd_migrate.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f860cd705961..918aa03a9e27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -30,6 +30,7 @@
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
+#include "kfd_migrate.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -814,6 +815,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
kfd_cwsr_init(kfd);
 
+   svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+
if (kfd_resume(kfd))
goto kfd_resume_error;
 
@@ -862,6 +865,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 {
if (kfd->init_complete) {
kgd2kfd_suspend(kfd, false);
+   svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644
index ..4bb39c562665
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+static void svm_migrate_page_free(struct page *page)
+{
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, mm->mmap_sem is taken
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+   .page_free  = svm_migrate_page_free,
+   .migrate_to_ram = svm_migrate_to_ram,
+};
+
+int svm_migrate_init(struct 

[PATCH 15/34] drm/amdkfd: validate vram svm range from TTM

2021-03-31 Thread Felix Kuehling
If svm range perfetch location is not zero, use TTM to alloc
amdgpu_bo vram nodes to validate svm range, then map vram nodes to GPUs.

Use offset to sub allocate from the same amdgpu_bo to handle overlap
vram range while adding new range or unmapping range.

svm_bo has ref count to trace the shared ranges. If all ranges of shared
amdgpu_bo are migrated to ram, ref count becomes 0, then amdgpu_bo is
released, all ranges svm_bo is set to NULL.

To migrate range from ram back to vram, allocate the same amdgpu_bo
with previous offset if the range has svm_bo.

If prange migrate to VRAM, no CPU mapping exist, then process exit will
not have unmap callback for this prange to free prange and svm bo. Free
outstanding pranges from svms list before process is freed in
svm_range_list_fini.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 293 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  20 ++
 2 files changed, 306 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b0e0b243754c..f01bc3b4010d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -44,7 +44,8 @@ static const struct mmu_interval_notifier_ops 
svm_range_mn_ops = {
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
  *
- * Remove the svm range from svms interval tree and link list
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
  *
  * Context: The caller must hold svms->lock
  */
@@ -53,6 +54,12 @@ static void svm_range_unlink(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
 prange, prange->start, prange->last);
 
+   if (prange->svm_bo) {
+   spin_lock(>svm_bo->list_lock);
+   list_del(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+   }
+
list_del(>list);
if (prange->it_node.start != 0 && prange->it_node.last != 0)
interval_tree_remove(>it_node, >svms->objects);
@@ -217,6 +224,7 @@ static void svm_range_free(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
mutex_destroy(>lock);
kfree(prange);
@@ -251,6 +259,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>svm_bo_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
@@ -264,6 +273,210 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
return prange;
 }
 
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo || !kref_get_unless_zero(_bo->kref))
+   return false;
+
+   return true;
+}
+
+static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+   if (svm_bo)
+   kref_get(_bo->kref);
+
+   return svm_bo;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = container_of(kref, struct svm_range_bo, kref);
+   spin_lock(_bo->list_lock);
+   while (!list_empty(_bo->range_list)) {
+   struct svm_range *prange =
+   list_first_entry(_bo->range_list,
+   struct svm_range, svm_bo_list);
+   /* list_del_init tells a concurrent svm_range_vram_node_new when
+* it's safe to reuse the svm_bo pointer and svm_bo_list head.
+*/
+   list_del_init(>svm_bo_list);
+   spin_unlock(_bo->list_lock);
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange->start, prange->last);
+   mutex_lock(>lock);
+   prange->svm_bo = NULL;
+   mutex_unlock(>lock);
+
+   spin_lock(_bo->list_lock);
+   }
+   spin_unlock(_bo->list_lock);
+
+   amdgpu_bo_unref(_bo->bo);
+   kfree(svm_bo);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo)
+   return;
+
+   kref_put(_bo->kref, svm_range_bo_release);
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
+   if (!svm_bo)
+   return NULL;
+
+   kref_init(_bo->kref);
+   INIT_LIST_HEAD(_bo->range_list);
+   spin_lock_init(_bo->list_lock);
+
+   return svm_bo;
+}
+
+int

[PATCH 13/34] drm/amdkfd: add ioctl to configure and query xnack retries

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

Xnack retries are used for page fault recovery. Some AMD chip
families support continuously retry while page table entries are invalid.
The driver must handle the page fault interrupt and fill in a valid entry
for the GPU to continue.

This ioctl allows to enable/disable XNACK retries per KFD process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 +++
 include/uapi/linux/kfd_ioctl.h   | 43 +++-
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9511826ac8ae..9838d0cd1f51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1743,6 +1743,31 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+   struct kfd_process *p, void *data)
+{
+   struct kfd_ioctl_set_xnack_mode_args *args = data;
+   int r = 0;
+
+   mutex_lock(>mutex);
+   if (args->xnack_enabled >= 0) {
+   if (!list_empty(>pqm.queues)) {
+   pr_debug("Process has user queues running\n");
+   mutex_unlock(>mutex);
+   return -EBUSY;
+   }
+   if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+   r = -EPERM;
+   else
+   p->xnack_enabled = args->xnack_enabled;
+   } else {
+   args->xnack_enabled = p->xnack_enabled;
+   }
+   mutex_unlock(>mutex);
+
+   return r;
+}
+
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
struct kfd_ioctl_svm_args *args = data;
@@ -1869,6 +1894,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
kfd_ioctl_smi_events, 0),
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+   kfd_ioctl_set_xnack_mode, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 247b57baa94f..3cb5b5dd9f77 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args {
struct kfd_ioctl_svm_attribute attrs[0];
 };
 
+/**
+ * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode
+ *
+ * @xnack_enabled:   [in/out] Whether to enable XNACK mode for this process
+ *
+ * @xnack_enabled indicates whether recoverable page faults should be
+ * enabled for the current process. 0 means disabled, positive means
+ * enabled, negative means leave unchanged. If enabled, virtual address
+ * translations on GFXv9 and later AMD GPUs can return XNACK and retry
+ * the access until a valid PTE is available. This is used to implement
+ * device page faults.
+ *
+ * On output, @xnack_enabled returns the (new) current mode (0 or
+ * positive). Therefore, a negative input value can be used to query
+ * the current mode without changing it.
+ *
+ * The XNACK mode fundamentally changes the way SVM managed memory works
+ * in the driver, with subtle effects on application performance and
+ * functionality.
+ *
+ * Enabling XNACK mode requires shader programs to be compiled
+ * differently. Furthermore, not all GPUs support changing the mode
+ * per-process. Therefore changing the mode is only allowed while no
+ * user mode queues exist in the process. This ensure that no shader
+ * code is running that may be compiled for the wrong mode. And GPUs
+ * that cannot change to the requested mode will prevent the XNACK
+ * mode from occurring. All GPUs used by the process must be in the
+ * same XNACK mode.
+ *
+ * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM.
+ * Therefore those GPUs are not considered for the XNACK mode switch.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+struct kfd_ioctl_set_xnack_mode_args {
+   __s32 xnack_enabled;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)   _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args {
 
 #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
 
+#define AMDKFD_IOC_SET_XNACK_MODE  \
+   AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
+
 #define AMDKFD_COMMAND_START   0x01
-#define AMDKFD_COMMAND_END 0x21
+#define AMDKFD_COMMAND_END 0x22
 
 #endif
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 12/34] drm/amdkfd: add xnack enabled flag to kfd_process

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

XNACK mode controls the SQ RETRY_DISABLE setting that determines,
whether recoverable page faults can be supported on GFXv9 hardware.
Only on Aldebaran we can support different processes running with
different XNACK modes. On older chips all processes must use the same
RETRY_DISABLE setting. However, processes not relying on recoverable
page faults can work with RETRY enabled. This means XNACK off is always
available as a fallback so we can use the same mode on all GPUs in a
process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 13 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  4 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 45 +++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index eca6331efa94..b5c3d13643f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
-   if (dqm->dev->noretry &&
-   !dqm->dev->use_iommu_v2)
+
+   if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+   /* Aldebaran can safely support different XNACK modes
+* per process
+*/
+   if (!pdd->process->xnack_enabled)
+   qpd->sh_mem_config |=
+   1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+   } else if (dqm->dev->noretry &&
+  !dqm->dev->use_iommu_v2) {
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+   }
 
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1b829eef9e50..5c0efaaebd8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -824,6 +824,8 @@ struct kfd_process {
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool svm_disabled;
+
+   bool xnack_enabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -883,6 +885,8 @@ struct kfd_process_device 
*kfd_get_process_device_data(struct kfd_dev *dev,
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
+
 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
  struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3bcde43ccd70..f897c1d0ea66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1193,6 +1193,48 @@ void kfd_process_set_trap_handler(struct 
qcm_process_device *qpd,
}
 }
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+   int i;
+
+   /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+* boot time retry setting. Mixing processes with different
+* XNACK/retry settings can hang the GPU.
+*
+* Different GPUs can have different noretry settings depending
+* on HW bugs or limitations. We need to find at least one
+* XNACK mode for this process that's compatible with all GPUs.
+* Fortunately GPUs with retry enabled (noretry=0) can run code
+* built for XNACK-off. On GFXv9 it may perform slower.
+*
+* Therefore applications built for XNACK-off can always be
+* supported and will be our fallback if any GPU does not
+* support retry.
+*/
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_dev *dev = p->pdds[i]->dev;
+
+   /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+* support the SVM APIs and don't need to be considered
+* for the XNACK mode selection.
+*/
+   if (dev->device_info->asic_family < CHIP_VEGA10)
+   continue;
+   /* Aldebaran can always support XNACK because it can support
+* per-process XNACK mode selection. But let the dev->noretry
+* setting still influence the default XNACK mode.
+*/
+   if (supported &&
+   

[PATCH 11/34] drm/amdgpu: Enable retry faults unconditionally on Aldebaran

2021-03-31 Thread Felix Kuehling
This is needed to allow per-process XNACK mode selection in the SQ when
booting with XNACK off by default.

Signed-off-by: Felix Kuehling 
Reviewed-by: Philip Yang 
Tested-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 8 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 6 --
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4bcc03c4c6c5..09f88874bf56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -640,7 +640,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
 
 /**
  * DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
  * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
  */
 MODULE_PARM_DESC(noretry,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 5bb9856bd8a9..f2fb2cac5c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -292,10 +292,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct 
amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
-   /* Send no-retry XNACK on fault to suppress VM fault storm. */
+   /* Send no-retry XNACK on fault to suppress VM fault storm.
+* On Aldebaran, XNACK can be enabled in the SQ per-process.
+* Retry faults need to be enabled for that to work.
+*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-   !adev->gmc.noretry);
+   !adev->gmc.noretry ||
+   adev->asic_type == CHIP_ALDEBARAN);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, 
mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 29d7f50912ee..b9d789a9e49e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -298,10 +298,12 @@ static void mmhub_v1_7_setup_vmid_config(struct 
amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
-   /* Send no-retry XNACK on fault to suppress VM fault storm. */
+   /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+* Retry faults need to be enabled for that to work.
+*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-   !adev->gmc.noretry);
+   1);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, 
regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 10/34] drm/amdkfd: svm range eviction and restore

2021-03-31 Thread Felix Kuehling
HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 137 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   2 +
 4 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d19a13fc227..1b829eef9e50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -738,6 +738,8 @@ struct svm_range_list {
struct work_struct  deferred_list_work;
struct list_headdeferred_range_list;
spinlock_t  deferred_list_lock;
+   atomic_tevicted_ranges;
+   struct delayed_work restore_work;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1f3d4f5c64a8..3bcde43ccd70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
 
cancel_delayed_work_sync(>eviction_work);
cancel_delayed_work_sync(>restore_work);
+   cancel_delayed_work_sync(>svms.restore_work);
 
mutex_lock(>mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4736fe996feb..b0e0b243754c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -21,6 +21,7 @@
  */
 
 #include 
+#include 
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -28,6 +29,8 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -250,6 +253,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>insert_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
+   atomic_set(>invalid, 0);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
@@ -964,6 +968,129 @@ svm_range_list_lock_and_flush_work(struct svm_range_list 
*svms,
goto retry_flush_work;
 }
 
+static void svm_range_restore_work(struct work_struct *work)
+{
+   struct delayed_work *dwork = to_delayed_work(work);
+   struct amdkfd_process_info *process_info;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   struct kfd_process *p;
+   struct mm_struct *mm;
+   int evicted_ranges;
+   int invalid;
+   int r;
+
+   svms = container_of(dwork, struct svm_range_list, restore_work);
+   evicted_ranges = atomic_read(>evicted_ranges);
+   if (!evicted_ranges)
+   return;
+
+   pr_debug("restore svm ranges\n");
+
+   /* kfd_process_notifier_release destroys this worker thread. So during
+* the lifetime of this thread, kfd_process and mm will be valid.
+*/
+   p = container_of(svms, struct kfd_process, svms);
+   process_info = p->kgd_process_info;
+   mm = p->mm;
+   if (!mm)
+   return;
+
+   mutex_lock(_info->lock);
+   svm_range_list_lock_and_flush_work(svms, mm);
+   mutex_lock(>lock);
+
+   evicted_ranges = atomic_read(>evicted_ranges);
+
+   list_for_each_entry(prange, >list, list) {
+   invalid = atomic_read(>invalid);
+   if (!invalid)
+   continue;
+
+   pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+prange->svms, prange, prange->start, prange->last,
+invalid);
+
+   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+  false, true);
+   if (r) {
+   pr_debug("failed %d to map 0x%lx to gpus\n", r,
+prange->start);
+   goto unlock_out;
+   }
+
+   if (atomic_cmpxchg(>invalid, invalid, 0) != invalid)
+   goto unlock_out;
+   }
+
+   if (atomic_cmpxchg(>evicted_ranges, evicted_ranges, 0) !=
+   evicted_ranges)
+   goto unlock_out;
+
+   evicted_ranges 

[PATCH 08/34] drm/amdgpu: export vm update mapping interface

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

It will be used by kfd to map svm range to GPU, because svm range does
not have amdgpu_bo and bo_va, cannot use amdgpu_bo_update interface, use
amdgpu vm update interface directly.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc4d6ae71476..7e306fd20de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1592,15 +1592,15 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
  * Returns:
  * 0 for success, -EINVAL for failure.
  */
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-  struct amdgpu_device *bo_adev,
-  struct amdgpu_vm *vm, bool immediate,
-  bool unlocked, struct dma_resv *resv,
-  uint64_t start, uint64_t last,
-  uint64_t flags, uint64_t offset,
-  struct drm_mm_node *nodes,
-  dma_addr_t *pages_addr,
-  struct dma_fence **fence)
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e5a8b9..848e175e99ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -366,6 +366,8 @@ struct amdgpu_vm_manager {
spinlock_t  pasid_lock;
 };
 
+struct amdgpu_bo_va_mapping;
+
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) 
((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) 
((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), 
(incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) 
((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), 
(incr), (flags)))
@@ -397,6 +399,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
   struct amdgpu_vm *vm);
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 09/34] drm/amdkfd: map svm range to GPUs

2021-03-31 Thread Felix Kuehling
Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap
svm range system memory pages address to GPUs.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 478 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +
 2 files changed, 479 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ddb1e2a29881..4736fe996feb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -98,11 +98,123 @@ static void svm_range_remove_notifier(struct svm_range 
*prange)
mmu_interval_notifier_remove(>notifier);
 }
 
+static int
+svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
+ unsigned long *hmm_pfns, uint64_t npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   dma_addr_t *addr = *dma_addr;
+   struct page *page;
+   int i, r;
+
+   if (!addr) {
+   addr = kvmalloc_array(npages, sizeof(*addr),
+ GFP_KERNEL | __GFP_ZERO);
+   if (!addr)
+   return -ENOMEM;
+   *dma_addr = addr;
+   }
+
+   for (i = 0; i < npages; i++) {
+   if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
+ "leaking dma mapping\n"))
+   dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+   page = hmm_pfn_to_page(hmm_pfns[i]);
+   addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+   r = dma_mapping_error(dev, addr[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   return r;
+   }
+   pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
+addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+   }
+   return 0;
+}
+
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long *hmm_pfns)
+{
+   struct kfd_process *p;
+   uint32_t gpuidx;
+   int r;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+
+   for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+   struct kfd_process_device *pdd;
+   struct amdgpu_device *adev;
+
+   pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   return -EINVAL;
+   }
+   adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+   r = svm_range_dma_map_dev(adev->dev, >dma_addr[gpuidx],
+ hmm_pfns, prange->npages);
+   if (r)
+   break;
+   }
+
+   return r;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+unsigned long offset, unsigned long npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   int i;
+
+   if (!dma_addr)
+   return;
+
+   for (i = offset; i < offset + npages; i++) {
+   if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+   continue;
+   pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+   dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+   dma_addr[i] = 0;
+   }
+}
+
+static void svm_range_free_dma_mappings(struct svm_range *prange)
+{
+   struct kfd_process_device *pdd;
+   dma_addr_t *dma_addr;
+   struct device *dev;
+   struct kfd_process *p;
+   uint32_t gpuidx;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+
+   for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+   dma_addr = prange->dma_addr[gpuidx];
+   if (!dma_addr)
+   continue;
+
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   continue;
+   }
+   dev = >dev->pdev->dev;
+   svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+   kvfree(dma_addr);
+   prange->dma_addr[gpuidx] = NULL;
+   }
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_free_dma_mappings(prange);
mutex_destroy(>lock);
kfree(prange);
 }
@@ -148,6 +260,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
return prange;
 }
 
+static int svm_range_bo_validate(void 

[PATCH 07/34] drm/amdkfd: deregister svm range

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

When application explicitly call unmap or unmap from mmput when
application exit, driver will receive MMU_NOTIFY_UNMAP event to remove
svm range from process svms object tree and list first, unmap from GPUs
(in the following patch).

Split the svm ranges to handle partial unmapping of svm ranges. To
avoid deadlocks, updating MMU notifiers, range lists and interval trees
is done in a deferred worker. New child ranges are attached to their
parent range's child_list until the worker can update the
svm_range_list. svm_range_set_attr flushes deferred work and takes the
mmap_write_lock to guarantee that it has an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 285 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  18 ++
 3 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c1d7789b91e..0d19a13fc227 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -735,6 +735,9 @@ struct svm_range_list {
struct mutexlock;
struct rb_root_cached   objects;
struct list_headlist;
+   struct work_struct  deferred_list_work;
+   struct list_headdeferred_range_list;
+   spinlock_t  deferred_list_lock;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c6a9766b97a6..ddb1e2a29881 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -136,6 +136,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>deferred_list);
+   INIT_LIST_HEAD(>child_list);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
@@ -412,6 +414,17 @@ svm_range_split_head(struct svm_range *prange, struct 
svm_range *new,
return r;
 }
 
+void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+struct svm_range *pchild, enum svm_work_list_ops op)
+{
+   pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+pchild, pchild->start, pchild->last, prange, op);
+
+   pchild->work_item.mm = mm;
+   pchild->work_item.op = op;
+   list_add_tail(>child_list, >child_list);
+}
+
 /*
  * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
  *
@@ -471,6 +484,30 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
return r;
 }
 
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+  struct mm_struct *mm)
+{
+retry_flush_work:
+   flush_work(>deferred_list_work);
+   mmap_write_lock(mm);
+
+   if (list_empty(>deferred_range_list))
+   return;
+   mmap_write_unlock(mm);
+   pr_debug("retry flush\n");
+   goto retry_flush_work;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
struct svm_range *new;
@@ -611,15 +648,255 @@ svm_range_handle_overlap(struct svm_range_list *svms, 
struct svm_range *new,
return r;
 }
 
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+   struct svm_range *prange)
+{
+   unsigned long start;
+   unsigned long last;
+
+   start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+   last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+   if (prange->start == start && prange->last == last)
+   return;
+
+   pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ prange->svms, prange, start, last, prange->start,
+ prange->last);
+
+   if (start != 0 && last != 0) {
+   interval_tree_remove(>it_node, >svms->objects);
+   svm_range_remove_notifier(prange);
+   }
+   prange->it_node.start = prange->start;
+   prange->it_node.last = prange->last;
+
+   interval_tree_insert(>it_node, >svms->objects);
+   svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+   struct mm_struct *mm = prange->work_item.mm;
+
+   switch (prange->work_item.op) {
+   case SVM_OP_NULL:
+  

[PATCH 06/34] drm/amdkfd: validate svm range system memory

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Use HMM to get system memory pages address, which will be used to
map to GPUs or migrate to vram.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 116 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  18 +
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index de62265adeaa..c6a9766b97a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -28,6 +28,15 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+   const struct mmu_notifier_range *range,
+   unsigned long cur_seq);
+
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+   .invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
 /**
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
@@ -46,6 +55,18 @@ static void svm_range_unlink(struct svm_range *prange)
interval_tree_remove(>it_node, >svms->objects);
 }
 
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+   pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange, prange->start, prange->last);
+
+   mmu_interval_notifier_insert_locked(>notifier, mm,
+prange->start << PAGE_SHIFT,
+prange->npages << PAGE_SHIFT,
+_range_mn_ops);
+}
+
 /**
  * svm_range_add_to_svms - add svm range to svms
  * @prange: svm range structure to be added
@@ -65,11 +86,24 @@ static void svm_range_add_to_svms(struct svm_range *prange)
interval_tree_insert(>it_node, >svms->objects);
 }
 
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+   pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange,
+prange->notifier.interval_tree.start >> PAGE_SHIFT,
+prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+   if (prange->notifier.interval_tree.start != 0 &&
+   prange->notifier.interval_tree.last != 0)
+   mmu_interval_notifier_remove(>notifier);
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   mutex_destroy(>lock);
kfree(prange);
 }
 
@@ -102,6 +136,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -377,6 +412,65 @@ svm_range_split_head(struct svm_range *prange, struct 
svm_range *new,
return r;
 }
 
+/*
+ * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
+ *
+ * To prevent concurrent destruction or change of range attributes, the
+ * svm_read_lock must be held. The caller must not hold the svm_write_lock
+ * because that would block concurrent evictions and lead to deadlocks. To
+ * serialize concurrent migrations or validations of the same range, the
+ * prange->migrate_mutex must be held.
+ *
+ * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
+ * eviction fence.
+ *
+ * The following sequence ensures race-free validation and GPU mapping:
+ *
+ * 1. Reserve page table (and SVM BO if range is in VRAM)
+ * 2. hmm_range_fault to get page addresses (if system memory)
+ * 3. DMA-map pages (if system memory)
+ * 4-a. Take notifier lock
+ * 4-b. Check that pages still valid (mmu_interval_read_retry)
+ * 4-c. Check that the range was not split or otherwise invalidated
+ * 4-d. Update GPU page table
+ * 4.e. Release notifier lock
+ * 5. Release page table (and SVM BO) reservation
+ */
+static int svm_range_validate_and_map(struct mm_struct *mm,
+ struct svm_range *prange,
+ uint32_t gpuidx, bool intr, bool wait)
+{
+   struct hmm_range *hmm_range;
+   int r = 0;
+
+   if (!prange->actual_loc) {
+   r = amdgpu_hmm_range_get_pages(>notifier, mm, NULL,
+  prange->start << PAGE_SHIFT,
+  prange->npages, _range,
+  false, true);
+   if (r) {
+   pr_debug("failed %d to get svm range pages\n", r);
+   goto unreserve_out;
+ 

[PATCH 05/34] drm/amdgpu: add common HMM get pages function

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Move the HMM get pages function from amdgpu_ttm and to amdgpu_mn. This
common function will be used by new svm APIs.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  | 83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h  |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 +++---
 3 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 828b5167ff12..997da4237a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -155,3 +155,86 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mmu_interval_notifier_remove(>notifier);
bo->notifier.mm = NULL;
 }
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked)
+{
+   struct hmm_range *hmm_range;
+   unsigned long timeout;
+   unsigned long i;
+   unsigned long *pfns;
+   int r = 0;
+
+   hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+   if (unlikely(!hmm_range))
+   return -ENOMEM;
+
+   pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_range;
+   }
+
+   hmm_range->notifier = notifier;
+   hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+   if (!readonly)
+   hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+   hmm_range->hmm_pfns = pfns;
+   hmm_range->start = start;
+   hmm_range->end = start + npages * PAGE_SIZE;
+   timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+   hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+
+   if (likely(!mmap_locked))
+   mmap_read_lock(mm);
+
+   r = hmm_range_fault(hmm_range);
+
+   if (likely(!mmap_locked))
+   mmap_read_unlock(mm);
+   if (unlikely(r)) {
+   /*
+* FIXME: This timeout should encompass the retry from
+* mmu_interval_read_retry() as well.
+*/
+   if (r == -EBUSY && !time_after(jiffies, timeout))
+   goto retry;
+   goto out_free_pfns;
+   }
+
+   /*
+* Due to default_flags, all pages are HMM_PFN_VALID or
+* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+* the notifier_lock, and mmu_interval_read_retry() must be done first.
+*/
+   for (i = 0; pages && i < npages; i++)
+   pages[i] = hmm_pfn_to_page(pfns[i]);
+
+   *phmm_range = hmm_range;
+
+   return 0;
+
+out_free_pfns:
+   kvfree(pfns);
+out_free_range:
+   kfree(hmm_range);
+
+   return r;
+}
+
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+   int r;
+
+   r = mmu_interval_read_retry(hmm_range->notifier,
+   hmm_range->notifier_seq);
+   kvfree(hmm_range->hmm_pfns);
+   kfree(hmm_range);
+
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index a292238f75eb..7f7d37a457c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,6 +30,13 @@
 #include 
 #include 
 
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked);
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
 #if defined(CONFIG_HMM_MIRROR)
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 41a4c456961c..a2585058e65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -32,7 +32,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -670,10 +669,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
-   struct hmm_range *range;
-   unsigned long timeout;
struct mm_struct *mm;
-   unsigned long i;
+   bool readonly;
int r = 0;
 
mm = bo->notifier.mm;
@@ -689,76 +686,26 @@ int 

[PATCH 03/34] drm/amdkfd: register svm range

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

svm range structure stores the range start address, size, attributes,
flags, prefetch location and gpu bitmap which indicates which GPU this
range maps to. Same virtual address is shared by CPU and GPUs.

Process has svm range list which uses both interval tree and list to
store all svm ranges registered by the process. Interval tree is used by
GPU vm fault handler and CPU page fault handler to get svm range
structure from the specific address. List is used to scan all ranges in
eviction restore work.

No overlap range interval [start, last] exist in svms object interval
tree. If process registers new range which has overlap with old range,
the old range split into 2 ranges depending on the overlap happens at
head or tail part of old range.

Apply attributes preferred location, prefetch location, mapping flags,
migration granularity to svm range, store mapping gpu index into bitmap.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  17 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   8 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   9 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 729 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  85 +++
 6 files changed, 850 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index e1e4115dcf78..387ce0217d35 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -54,7 +54,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
-   $(AMDKFD_PATH)/kfd_crat.o
+   $(AMDKFD_PATH)/kfd_crat.o \
+   $(AMDKFD_PATH)/kfd_svm.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dbc824cc6b32..9511826ac8ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
+#include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 
@@ -1744,11 +1745,27 @@ static int kfd_ioctl_smi_events(struct file *filep,
 
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
+   struct kfd_ioctl_svm_args *args = data;
int r = 0;
 
if (p->svm_disabled)
return -EPERM;
 
+   pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+args->start_addr, args->size, args->op, args->nattr);
+
+   if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+   return -EINVAL;
+   if (!args->start_addr || !args->size)
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+ args->attrs);
+
+   mutex_unlock(>mutex);
+
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d701b53b9bc3..7c1d7789b91e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -731,6 +731,12 @@ struct kfd_process_device {
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 
+struct svm_range_list {
+   struct mutexlock;
+   struct rb_root_cached   objects;
+   struct list_headlist;
+};
+
 /* Process data */
 struct kfd_process {
/*
@@ -810,6 +816,8 @@ struct kfd_process {
struct kobject *kobj_queues;
struct attribute attr_pasid;
 
+   /* shared virtual memory registered by this process */
+   struct svm_range_list svms;
bool svm_disabled;
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1a6236317cd5..1f3d4f5c64a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
 #include 
 #include "amdgpu_amdkfd.h"
 #include "amdgpu.h"
+#include "kfd_svm.h"
 
 struct mm_struct;
 
@@ -42,6 +43,7 @@ struct mm_struct;
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
 #include "kfd_iommu.h"
+#include "kfd_svm.h"
 
 /*
  * List of struct kfd_process (field kfd_process).
@@ -1003,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct 
*work)
kfd_iommu_unbind_process(p);
 
kfd_process_free_outstanding_kfd_bos(p);
+   svm_range_list_fini(p);
 
kfd_process_destroy_pdds(p);

[PATCH 04/34] drm/amdkfd: add svm ioctl GET_ATTR op

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Get the intersection of attributes over all memory in the given
range

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 164 +++
 1 file changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 21e6a7959bc7..de62265adeaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -707,6 +707,167 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
return r;
 }
 
+static int
+svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+   DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+   DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+   bool get_preferred_loc = false;
+   bool get_prefetch_loc = false;
+   bool get_granularity = false;
+   bool get_accessible = false;
+   bool get_flags = false;
+   uint64_t last = start + size - 1UL;
+   struct mm_struct *mm = current->mm;
+   uint8_t granularity = 0xff;
+   struct interval_tree_node *node;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t flags = 0x;
+   int gpuidx;
+   uint32_t i;
+
+   pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", >svms, start,
+start + size - 1, nattr);
+
+   mmap_read_lock(mm);
+   if (!svm_range_is_valid(mm, start, size)) {
+   pr_debug("invalid range\n");
+   mmap_read_unlock(mm);
+   return -EINVAL;
+   }
+   mmap_read_unlock(mm);
+
+   for (i = 0; i < nattr; i++) {
+   switch (attrs[i].type) {
+   case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+   get_preferred_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+   get_prefetch_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_ACCESS:
+   get_accessible = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+   get_flags = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+   get_granularity = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+   case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+   case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+   fallthrough;
+   default:
+   pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+   return -EINVAL;
+   }
+   }
+
+   svms = >svms;
+
+   mutex_lock(>lock);
+
+   node = interval_tree_iter_first(>objects, start, last);
+   if (!node) {
+   pr_debug("range attrs not found return default values\n");
+   svm_range_set_default_attributes(, _loc,
+, );
+   /* TODO: Automatically create SVM ranges and map them on
+* GPU page faults
+   if (p->xnack_enabled)
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+*/
+
+   goto fill_values;
+   }
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+   bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE);
+
+   while (node) {
+   struct interval_tree_node *next;
+
+   prange = container_of(node, struct svm_range, it_node);
+   next = interval_tree_iter_next(node, start, last);
+
+   if (get_preferred_loc) {
+   if (prange->preferred_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+location != prange->preferred_loc)) {
+   location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_preferred_loc = false;
+   } else {
+   location = prange->preferred_loc;
+   }
+   }
+   if (get_prefetch_loc) {
+   if (prange->prefetch_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+prefetch_loc != prange->prefetch_loc)) {
+   prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_prefetch_loc = false;
+   } else {

[PATCH 02/34] drm/amdkfd: add svm ioctl API

2021-03-31 Thread Felix Kuehling
From: Philip Yang 

Add svm (shared virtual memory) ioctl data structure and API definition.

The svm ioctl API is designed to be extensible in the future. All
operations are provided by a single IOCTL to preserve ioctl number
space. The arguments structure ends with a variable size array of
attributes that can be used to set or get one or multiple attributes.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  12 ++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 include/uapi/linux/kfd_ioctl.h   | 130 ++-
 5 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 43de260b2230..dbc824cc6b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1742,6 +1742,16 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+   int r = 0;
+
+   if (p->svm_disabled)
+   return -EPERM;
+
+   return r;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1840,6 +1850,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index a2c9063076cc..52da1a3b2c7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
+   /* VI GPUs cannot support SVM with only
+* 40 bits of virtual address space.
+*/
+   process->svm_disabled |= true;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 17d91f05afe3..d701b53b9bc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -809,6 +809,8 @@ struct kfd_process {
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
+
+   bool svm_disabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d2cd757670d2..1a6236317cd5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1208,6 +1208,7 @@ static struct kfd_process *create_process(const struct 
task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+   process->svm_disabled = false;
INIT_DELAYED_WORK(>eviction_work, evict_process_worker);
INIT_DELAYED_WORK(>restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index bf5e7d7846dd..247b57baa94f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -30,9 +30,10 @@
  * - 1.1 - initial version
  * - 1.3 - Add SMI events support
  * - 1.4 - Indicate new SRAM EDC bit in device properties
+ * - 1.5 - Add SVM API
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 4
+#define KFD_IOCTL_MINOR_VERSION 5
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
@@ -473,6 +474,129 @@ enum kfd_mmio_remap {
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
 };
 
+/* Guarantee host access to memory */
+#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x0001
+/* Fine grained coherency between all devices with access */
+#define KFD_IOCTL_SVM_FLAG_COHERENT0x0002
+/* Use any GPU in same hive as preferred device */
+#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x0004
+/* GPUs only read, allows replication */
+#define KFD_IOCTL_SVM_FLAG_GPU_RO  0x0008
+/* Allow execution on GPU */
+#define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010
+/* GPUs mostly read, may allow similar optimizations 

[PATCH 01/34] drm/amdkfd: helper to convert gpu id and idx

2021-03-31 Thread Felix Kuehling
From: Alex Sierra 

svm range uses gpu bitmap to store which GPU svm range maps to.
Application pass driver gpu id to specify GPU, the helper is needed to
convert gpu id to gpu bitmap idx.

Access through kfd_process_device pointers array from kfd_process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 11 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0b6595f7acda..17d91f05afe3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -842,6 +842,17 @@ struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+   uint32_t gpuidx, uint32_t *gpuid) {
+   return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
+}
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+   struct kfd_process *p, uint32_t gpuidx) {
+   return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4241d29ea94..d2cd757670d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1607,6 +1607,16 @@ int kfd_process_restore_queues(struct kfd_process *p)
return ret;
 }
 
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+   return i;
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 00/34] Add HMM-based SVM memory manager to KFD v3

2021-03-31 Thread Felix Kuehling
Since the last patch series on March 22, I integrated all fixes into
the original patch series. An additonal fix was added for handling
failed migrations during GPU page faults. (A bigger rework of
migrations and VRAM mappings will come in the future.) Support for
per-process XNACK mode selecation was added for Aldebaran. The
initialization of svm_migrate was moved to happend before
kfd_topology_add_device in order to fix reporting of the
SVMAPI_SUPPORTED capability. An updated kfdtest now checks this
capability before running any SVM tests.

Support for SVM can now be controlled by a Kconfig option added in
patch 34.

This series and the corresponding ROCm Thunk and KFDTest changes are also
available on gitub:
  https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
  https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip

Alex Sierra (9):
  drm/amdkfd: helper to convert gpu id and idx
  drm/amdkfd: add xnack enabled flag to kfd_process
  drm/amdkfd: add ioctl to configure and query xnack retries
  drm/amdgpu: enable 48-bit IH timestamp counter
  drm/amdkfd: SVM API call to restore page tables
  drm/amdkfd: add svm_bo reference for eviction fence
  drm/amdgpu: add param bit flag to create SVM BOs
  drm/amdgpu: svm bo enable_signal call condition
  drm/amdgpu: add svm_bo eviction to enable_signal cb

Felix Kuehling (13):
  drm/amdkfd: map svm range to GPUs
  drm/amdkfd: svm range eviction and restore
  drm/amdgpu: Enable retry faults unconditionally on Aldebaran
  drm/amdkfd: validate vram svm range from TTM
  drm/amdkfd: HMM migrate ram to vram
  drm/amdkfd: HMM migrate vram to ram
  drm/amdkfd: invalidate tables on page retry fault
  drm/amdkfd: page table restore through svm API
  drm/amdkfd: add svm_bo eviction mechanism support
  drm/amdkfd: refine migration policy with xnack on
  drm/amdkfd: add svm range validate timestamp
  drm/amdkfd: multiple gpu migrate vram to vram
  drm/amdkfd: Add CONFIG_HSA_AMD_SVM

Philip Yang (12):
  drm/amdkfd: add svm ioctl API
  drm/amdkfd: register svm range
  drm/amdkfd: add svm ioctl GET_ATTR op
  drm/amdgpu: add common HMM get pages function
  drm/amdkfd: validate svm range system memory
  drm/amdkfd: deregister svm range
  drm/amdgpu: export vm update mapping interface
  drm/amdkfd: register HMM device private zone
  drm/amdkfd: support xgmi same hive mapping
  drm/amdkfd: copy memory through gart table
  drm/amdgpu: reserve fence slot to update page table
  drm/amdkfd: Add SVM API support capability bits

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  |   16 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|   83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   90 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   48 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   11 +
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |8 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c   |6 +-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 +
 drivers/gpu/drm/amd/amdkfd/Kconfig|   14 +
 drivers/gpu/drm/amd/amdkfd/Makefile   |5 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   64 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |4 +
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  |   13 +-
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |4 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |  922 ++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h  |   64 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   36 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   82 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 2866 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  205 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |6 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   10 +-
 include/uapi/linux/kfd_ioctl.h|  171 +-
 28 files changed, 4652 insertions(+), 108 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h

-- 
2.31.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


RE: [PATCH 1/2] drm/virtio: Create Dumb BOs as guest Blobs

2021-03-31 Thread Kasireddy, Vivek
Hi Gerd,

> > If support for Blob resources is available, then dumb BOs created by
> > the driver can be considered as guest Blobs. And, for guest Blobs,
> > there is no need to do any transfers or flushes
> 
> No.  VIRTGPU_BLOB_FLAG_USE_SHAREABLE means the host (aka device in virtio
> terms) *can* create a shared mapping.  So, the guest sends still needs to 
> send transfer
> commands, and then the device can shortcut the transfer commands on the host 
> side in
> case a shared mapping exists.
[Kasireddy, Vivek] Ok. IIUC, are you saying that the device may or may not 
create a shared
mapping (meaning res->image) and that the driver should not make any 
assumptions about
that and thus still do the transfers and flushes?

Also, could you please briefly explain what does 
VIRTIO_GPU_BLOB_FLAG_USE_MAPPABLE
mean given that the spec does not describe these blob_flags clearly? This is 
what the spec says:

"The driver MUST inform the device if the blob resource is used for
memory access, sharing between driver instances and/or sharing with
other devices. This is done via the \field{blob_flags} field."

And, what should be the default blob_flags value for a dumb bo if the userspace 
does not
specify them?

> 
> flush commands are still needed for dirty tracking.
> 
> > but we do need to do set_scanout even if the FB has not changed as
> > part of plane updates.
> 
> Sounds like you workaround host bugs.  This should not be needed with properly
> implemented flush.
[Kasireddy, Vivek] With the patches I tested with:
https://lists.nongnu.org/archive/html/qemu-devel/2021-03/msg09786.html

I noticed that if we do not have res->image and only have res->blob, we have to 
re-submit the blob/dmabuf and update the displaysurface if guest made updates 
to it 
(in this case same FB) which can only happen if we call set_scanout_blob. IIUC, 
flush
only marks the area as dirty but does not re-submit the updated buffer/blob and 
I see
a flicker if I let it do dpy_gfx_update().

Thanks,
Vivek

> 
> take care,
>   Gerd

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v7 5/8] mm: Device exclusive memory access

2021-03-31 Thread Alistair Popple
On Thursday, 1 April 2021 11:48:13 AM AEDT Jason Gunthorpe wrote:
> On Thu, Apr 01, 2021 at 11:45:57AM +1100, Alistair Popple wrote:
> > On Thursday, 1 April 2021 12:46:04 AM AEDT Jason Gunthorpe wrote:
> > > On Thu, Apr 01, 2021 at 12:27:52AM +1100, Alistair Popple wrote:
> > > > On Thursday, 1 April 2021 12:18:54 AM AEDT Jason Gunthorpe wrote:
> > > > > On Wed, Mar 31, 2021 at 11:59:28PM +1100, Alistair Popple wrote:
> > > > > 
> > > > > > I guess that makes sense as the split could go either way at the
> > > > > > moment but I should add a check to make sure this isn't used with
> > > > > > pinned pages anyway.
> > > > > 
> > > > > Is it possible to have a pinned page under one of these things? If I
> > > > > pin it before you migrate it then it remains pinned but hidden under
> > > > > the swap entry?
> > > > 
> > > > At the moment yes. But I had planned (and this reminded me) to add a 
check 
> > to 
> > > > prevent marking pinned pages for exclusive access. 
> > > 
> > > How do you even do that without races with GUP fast?
> > 
> > Unless I've missed something I think I've convinced myself it should be 
safe 
> > to do the pin check after make_device_exclusive() has replaced all the 
PTEs 
> > with exclusive entries.
> > 
> > GUP fast sequence:
> > 1. Read PTE
> > 2. Pin page
> > 3. Check PTE
> > 4. if PTE changed -> unpin and fallback
> > 
> > If make_device_exclusive() runs after (1) it will either succeed or see 
the 
> > pin from (2) and fail (as desired). GUP should always see the PTE change 
and 
> > fallback which will revoke the exclusive access.
> 
> AFAICT the user can trigger fork at that instant and fork will try to
> copy the desposited migration entry before it has been checked

In that case the child will get a read-only exclusive entry and eventually a 
page copy via do_wp_page() and GUP will fallback (or fail in the case of fast 
only) so the parent's exclusive entry will get removed before the page can be 
pinned and therefore shouldn't split the wrong way.

But that is sounding rather complex, and I am not convinced I haven't missed a 
corner case. It also seems like it shouldn't be necessary to copy exclusive 
entries anyway. I could just remove them and restore the original entry, which 
would be far simpler.

> Jason
> 




___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


linux-next: manual merge of the drm-intel tree with the drm tree

2021-03-31 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the drm-intel tree got a conflict in:

  drivers/gpu/drm/i915/display/intel_display.c

between commit:

  1b321026e213 ("drm/i915: Pass ww ctx to intel_pin_to_display_plane")

from the drm tree and commit:

  61169987c4d9 ("drm/i915: Unify the FB and plane state view information into 
one struct")

from the drm-intel tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/gpu/drm/i915/display/intel_display.c
index aa524eff20e1,bdb2adb4d748..
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@@ -1185,13 -1094,13 +1112,13 @@@ retry
 * mode that matches the user configuration.
 */
ret = i915_vma_pin_fence(vma);
-   if (ret != 0 && INTEL_GEN(dev_priv) < 4) {
+   if (ret != 0 && DISPLAY_VER(dev_priv) < 4) {
i915_vma_unpin(vma);
 -  vma = ERR_PTR(ret);
 -  goto err;
 +  goto err_unpin;
}
 +  ret = 0;
  
 -  if (ret == 0 && vma->fence)
 +  if (vma->fence)
*out_flags |= PLANE_HAS_FENCE;
}
  
@@@ -11363,12 -10508,20 +10536,12 @@@ int intel_plane_pin_fb(struct intel_pla
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
struct drm_framebuffer *fb = plane_state->hw.fb;
struct i915_vma *vma;
 +  bool phys_cursor =
 +  plane->id == PLANE_CURSOR &&
 +  INTEL_INFO(dev_priv)->display.cursor_needs_physical;
  
 -  if (plane->id == PLANE_CURSOR &&
 -  INTEL_INFO(dev_priv)->display.cursor_needs_physical) {
 -  struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 -  const int align = intel_cursor_alignment(dev_priv);
 -  int err;
 -
 -  err = i915_gem_object_attach_phys(obj, align);
 -  if (err)
 -  return err;
 -  }
 -
 -  vma = intel_pin_and_fence_fb_obj(fb,
 +  vma = intel_pin_and_fence_fb_obj(fb, phys_cursor,
-_state->view,
+_state->view.gtt,
 intel_plane_uses_fence(plane_state),
 _state->flags);
if (IS_ERR(vma))


pgp3DMXaZxfOQ.pgp
Description: OpenPGP digital signature
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/3] drm/mediatek: dpi: add bus format negociation

2021-03-31 Thread CK Hu
Hi, Jitao:

On Tue, 2021-03-30 at 23:53 +0800, Jitao Shi wrote:
> Add the atomic_get_output_bus_fmts, atomic_get_input_bus_fmts to negociate
> the possible output and input formats for the current mode and monitor,
> and use the negotiated formats in a basic atomic_check callback.
> 
> Signed-off-by: Jitao Shi 
> ---
>  drivers/gpu/drm/mediatek/mtk_dpi.c | 96 
> --
>  1 file changed, 91 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c 
> b/drivers/gpu/drm/mediatek/mtk_dpi.c
> index 87bb27649c4c..4e45d1b01b0c 100644
> --- a/drivers/gpu/drm/mediatek/mtk_dpi.c
> +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
> @@ -81,6 +81,8 @@ struct mtk_dpi {
>   struct pinctrl *pinctrl;
>   struct pinctrl_state *pins_gpio;
>   struct pinctrl_state *pins_dpi;
> + unsigned int in_bus_format;
> + unsigned int out_bus_format;

Why do you keep these two value? You does not use them.

>   bool ddr_edge_sel;
>   int refcount;
>  };
> @@ -534,6 +536,92 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
>   return 0;
>  }
>  
> +#define MAX_OUTPUT_SEL_FORMATS   2
> +
> +static u32 *mtk_dpi_bridge_atomic_get_output_bus_fmts(struct drm_bridge 
> *bridge,
> + struct drm_bridge_state *bridge_state,
> + struct drm_crtc_state *crtc_state,
> + struct drm_connector_state *conn_state,
> + unsigned int *num_output_fmts)
> +{
> + struct drm_display_mode *mode = _state->mode;
> + u32 *output_fmts;
> + struct mtk_dpi *dpi = bridge_to_dpi(bridge);
> +
> + *num_output_fmts = 0;
> +
> + output_fmts = kcalloc(MAX_OUTPUT_SEL_FORMATS, sizeof(*output_fmts),
> +   GFP_KERNEL);
> + if (!output_fmts)
> + return NULL;
> +
> + /* Default 8bit RGB fallback */
> + if (dpi->conf->dual_edge) {
> + output_fmts[0] =  MEDIA_BUS_FMT_RGB888_2X12_LE;
> + output_fmts[1] =  MEDIA_BUS_FMT_RGB888_2X12_BE;

So mt8183 does not support MEDIA_BUS_FMT_RGB888_1X24?

> + *num_output_fmts = 2;
> + } else {
> + output_fmts[0] =  MEDIA_BUS_FMT_RGB888_1X24;
> + *num_output_fmts = 1;
> + }
> +
> + return output_fmts;
> +}
> +
> +#define MAX_INPUT_SEL_FORMATS1
> +
> +static u32 *mtk_dpi_bridge_atomic_get_input_bus_fmts(struct drm_bridge 
> *bridge,
> + struct drm_bridge_state *bridge_state,
> + struct drm_crtc_state *crtc_state,
> + struct drm_connector_state *conn_state,
> + u32 output_fmt,
> + unsigned int *num_input_fmts)
> +{
> + u32 *input_fmts;
> +
> + *num_input_fmts = 0;
> +
> + input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts),
> +  GFP_KERNEL);
> + if (!input_fmts)
> + return NULL;
> +
> + *num_input_fmts = 1;
> + input_fmts[0] = MEDIA_BUS_FMT_RGB888_1X24;
> +
> + return input_fmts;
> +}
> +
> +static int mtk_dpi_bridge_atomic_check(struct drm_bridge *bridge,
> +struct drm_bridge_state *bridge_state,
> +struct drm_crtc_state *crtc_state,
> +struct drm_connector_state *conn_state)
> +{
> + struct mtk_dpi *dpi = bridge->driver_private;
> +
> + dpi->out_bus_format = bridge_state->output_bus_cfg.format;
> +
> + dpi->in_bus_format = bridge_state->input_bus_cfg.format;
> +
> + dev_dbg(dpi->dev, "input format 0x%04x, output format 0x%04x\n",
> + bridge_state->input_bus_cfg.format,
> + bridge_state->output_bus_cfg.format);
> +
> + if (dpi->out_bus_format == MEDIA_BUS_FMT_RGB888_2X12_LE ||
> + dpi->out_bus_format == MEDIA_BUS_FMT_RGB888_2X12_BE) {

I think you could remove this 'if' checking.

Regards,
CK.

> + dpi->ddr_edge_sel =
> + (dpi->out_bus_format == MEDIA_BUS_FMT_RGB888_2X12_LE) ?
> +  true : false;
> + }
> +
> + dpi->bit_num = MTK_DPI_OUT_BIT_NUM_8BITS;
> + dpi->channel_swap = MTK_DPI_OUT_CHANNEL_SWAP_RGB;
> + dpi->yc_map = MTK_DPI_OUT_YC_MAP_RGB;
> + dpi->color_format = MTK_DPI_COLOR_FORMAT_RGB;
> +
> + return 0;
> +}
> +
>  static int mtk_dpi_bridge_attach(struct drm_bridge *bridge,
>enum drm_bridge_attach_flags flags)
>  {
> @@ -572,6 +660,9 @@ static const struct drm_bridge_funcs mtk_dpi_bridge_funcs 
> = {
>   .mode_set = mtk_dpi_bridge_mode_set,
>   .disable = mtk_dpi_bridge_disable,
>   .enable = mtk_dpi_bridge_enable,
> + .atomic_check = mtk_dpi_bridge_atomic_check,
> + .atomic_get_output_bus_fmts = 

Re: [v1] drm/msm/disp/dpu1: fix warn stack reported during dpu resume

2021-03-31 Thread Dmitry Baryshkov

On 01/04/2021 01:47, Rob Clark wrote:

On Wed, Mar 31, 2021 at 9:03 AM Dmitry Baryshkov
 wrote:


On 31/03/2021 14:27, Kalyan Thota wrote:

WARN_ON was introduced by the below commit to catch runtime resumes
that are getting triggered before icc path was set.

"drm/msm/disp/dpu1: icc path needs to be set before dpu runtime resume"

For the targets where the bw scaling is not enabled, this WARN_ON is
a false alarm. Fix the WARN condition appropriately.


Should we change all DPU targets to use bw scaling to the mdp from the
mdss nodes? The limitation to sc7180 looks artificial.


yes, we should, this keeps biting us on 845


Done, 
https://lore.kernel.org/linux-arm-msm/20210401020533.3956787-2-dmitry.barysh...@linaro.org/






Reported-by: Steev Klimaszewski 


Please add Fixes: tag as well


Signed-off-by: Kalyan Thota 
---
   drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  8 +---
   drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h  |  9 +
   drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 11 ++-
   3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index cab387f..0071a4d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -294,6 +294,9 @@ static int dpu_kms_parse_data_bus_icc_path(struct dpu_kms 
*dpu_kms)
   struct icc_path *path1;
   struct drm_device *dev = dpu_kms->dev;

+ if (!dpu_supports_bw_scaling(dev))
+ return 0;
+
   path0 = of_icc_get(dev->dev, "mdp0-mem");
   path1 = of_icc_get(dev->dev, "mdp1-mem");

@@ -934,8 +937,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
   DPU_DEBUG("REG_DMA is not defined");
   }

- if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))
- dpu_kms_parse_data_bus_icc_path(dpu_kms);
+ dpu_kms_parse_data_bus_icc_path(dpu_kms);

   pm_runtime_get_sync(_kms->pdev->dev);

@@ -1198,7 +1200,7 @@ static int __maybe_unused dpu_runtime_resume(struct 
device *dev)

   ddev = dpu_kms->dev;

- WARN_ON(!(dpu_kms->num_paths));
+ WARN_ON((dpu_supports_bw_scaling(ddev) && !dpu_kms->num_paths));
   /* Min vote of BW is required before turning on AXI clk */
   for (i = 0; i < dpu_kms->num_paths; i++)
   icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index d6717d6..f7bcc0a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -154,6 +154,15 @@ struct vsync_info {

   #define to_dpu_global_state(x) container_of(x, struct dpu_global_state, base)

+/**
+ * dpu_supports_bw_scaling: returns true for drivers that support bw scaling.
+ * @dev: Pointer to drm_device structure
+ */
+static inline int dpu_supports_bw_scaling(struct drm_device *dev)
+{
+ return of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss");
+}
+
   /* Global private object state for tracking resources that are shared across
* multiple kms objects (planes/crtcs/etc).
*/
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index cd40788..8cd712c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -41,6 +41,9 @@ static int dpu_mdss_parse_data_bus_icc_path(struct drm_device 
*dev,
   struct icc_path *path0 = of_icc_get(dev->dev, "mdp0-mem");
   struct icc_path *path1 = of_icc_get(dev->dev, "mdp1-mem");

+ if (dpu_supports_bw_scaling(dev))
+ return 0;
+
   if (IS_ERR_OR_NULL(path0))
   return PTR_ERR_OR_ZERO(path0);

@@ -276,11 +279,9 @@ int dpu_mdss_init(struct drm_device *dev)

   DRM_DEBUG("mapped mdss address space @%pK\n", dpu_mdss->mmio);

- if (!of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) {
- ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
- if (ret)
- return ret;
- }
+ ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
+ if (ret)
+ return ret;

   mp = _mdss->mp;
   ret = msm_dss_parse_clock(pdev, mp);




--
With best wishes
Dmitry



--
With best wishes
Dmitry
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re:Re: [PATCH] drm/ttm: cleanup coding style a bit

2021-03-31 Thread Bernard


From: "Christian König" 
Date: 2021-03-31 21:15:22
To:  Bernard Zhao ,Huang Rui ,David Airlie 
,Daniel Vetter 
,dri-devel@lists.freedesktop.org,linux-ker...@vger.kernel.org
Cc:  opensource.ker...@vivo.com
Subject: Re: [PATCH] drm/ttm: cleanup coding style a bit>Am 31.03.21 um 15:12 
schrieb Bernard Zhao:
>> Fix sparse warning:
>> drivers/gpu/drm/ttm/ttm_bo.c:52:1: warning: symbol 'ttm_global_mutex' was 
>> not declared. Should it be static?
>> drivers/gpu/drm/ttm/ttm_bo.c:53:10: warning: symbol 'ttm_bo_glob_use_count' 
>> was not declared. Should it be static?
>>
>> Signed-off-by: Bernard Zhao 
>
>You are based on an outdated branch, please rebase on top of drm-misc-next.
>

Hi

Sure, thanks for your review!
I will fix this and resubmit this patch.

BR//Bernard

>Regards,
>Christian.
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 4 ++--
>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 101a68dc615b..eab21643edfb 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -49,8 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject 
>> *kobj);
>>   /*
>>* ttm_global_mutex - protecting the global BO state
>>*/
>> -DEFINE_MUTEX(ttm_global_mutex);
>> -unsigned ttm_bo_glob_use_count;
>> +static DEFINE_MUTEX(ttm_global_mutex);
>> +static unsigned int ttm_bo_glob_use_count;
>>   struct ttm_bo_global ttm_bo_glob;
>>   EXPORT_SYMBOL(ttm_bo_glob);
>>   
>


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/2] drm/msm/dpu: fill missing details in hw catalog for sdm845 and sm8[12]50

2021-03-31 Thread Dmitry Baryshkov
Fill clk_inefficiency_factor, bw_inefficiency_factor and
min_prefill_lines in hw catalog data for sdm845 and sm8[12]50.

Efficiency factors are blindly copied from sc7180 data, while
min_prefill_lines is based on downstream display driver.

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 189f3533525c..a9f74c1177dd 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -817,6 +817,8 @@ static const struct dpu_perf_cfg sdm845_perf_data = {
{.rd_enable = 1, .wr_enable = 1},
{.rd_enable = 1, .wr_enable = 0}
},
+   .clk_inefficiency_factor = 105,
+   .bw_inefficiency_factor = 120,
 };
 
 static const struct dpu_perf_cfg sc7180_perf_data = {
@@ -852,6 +854,7 @@ static const struct dpu_perf_cfg sm8150_perf_data = {
.min_core_ib = 240,
.min_llcc_ib = 80,
.min_dram_ib = 80,
+   .min_prefill_lines = 24,
.danger_lut_tbl = {0xf, 0x, 0x0},
.qos_lut_tbl = {
{.nentry = ARRAY_SIZE(sm8150_qos_linear),
@@ -869,6 +872,8 @@ static const struct dpu_perf_cfg sm8150_perf_data = {
{.rd_enable = 1, .wr_enable = 1},
{.rd_enable = 1, .wr_enable = 0}
},
+   .clk_inefficiency_factor = 105,
+   .bw_inefficiency_factor = 120,
 };
 
 static const struct dpu_perf_cfg sm8250_perf_data = {
@@ -877,6 +882,7 @@ static const struct dpu_perf_cfg sm8250_perf_data = {
.min_core_ib = 480,
.min_llcc_ib = 0,
.min_dram_ib = 80,
+   .min_prefill_lines = 35,
.danger_lut_tbl = {0xf, 0x, 0x0},
.qos_lut_tbl = {
{.nentry = ARRAY_SIZE(sc7180_qos_linear),
@@ -894,6 +900,8 @@ static const struct dpu_perf_cfg sm8250_perf_data = {
{.rd_enable = 1, .wr_enable = 1},
{.rd_enable = 1, .wr_enable = 0}
},
+   .clk_inefficiency_factor = 105,
+   .bw_inefficiency_factor = 120,
 };
 
 /*
-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/2] drm/msm/dpu: always use mdp device to scale bandwidth

2021-03-31 Thread Dmitry Baryshkov
Currently DPU driver scales bandwidth and core clock for sc7180 only,
while the rest of chips get static bandwidth votes. Make all chipsets
scale bandwidth and clock per composition requirements like sc7180 does.
Drop old voting path completely.

Tested on RB3 (SDM845) and RB5 (SM8250).

Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  3 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 51 +---
 2 files changed, 2 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 85f2c3564c96..fb061e666faa 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -933,8 +933,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
DPU_DEBUG("REG_DMA is not defined");
}
 
-   if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))
-   dpu_kms_parse_data_bus_icc_path(dpu_kms);
+   dpu_kms_parse_data_bus_icc_path(dpu_kms);
 
pm_runtime_get_sync(_kms->pdev->dev);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index cd4078807db1..3416e9617ee9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -31,40 +31,8 @@ struct dpu_mdss {
void __iomem *mmio;
struct dss_module_power mp;
struct dpu_irq_controller irq_controller;
-   struct icc_path *path[2];
-   u32 num_paths;
 };
 
-static int dpu_mdss_parse_data_bus_icc_path(struct drm_device *dev,
-   struct dpu_mdss *dpu_mdss)
-{
-   struct icc_path *path0 = of_icc_get(dev->dev, "mdp0-mem");
-   struct icc_path *path1 = of_icc_get(dev->dev, "mdp1-mem");
-
-   if (IS_ERR_OR_NULL(path0))
-   return PTR_ERR_OR_ZERO(path0);
-
-   dpu_mdss->path[0] = path0;
-   dpu_mdss->num_paths = 1;
-
-   if (!IS_ERR_OR_NULL(path1)) {
-   dpu_mdss->path[1] = path1;
-   dpu_mdss->num_paths++;
-   }
-
-   return 0;
-}
-
-static void dpu_mdss_icc_request_bw(struct msm_mdss *mdss)
-{
-   struct dpu_mdss *dpu_mdss = to_dpu_mdss(mdss);
-   int i;
-   u64 avg_bw = dpu_mdss->num_paths ? MAX_BW / dpu_mdss->num_paths : 0;
-
-   for (i = 0; i < dpu_mdss->num_paths; i++)
-   icc_set_bw(dpu_mdss->path[i], avg_bw, kBps_to_icc(MAX_BW));
-}
-
 static void dpu_mdss_irq(struct irq_desc *desc)
 {
struct dpu_mdss *dpu_mdss = irq_desc_get_handler_data(desc);
@@ -178,8 +146,6 @@ static int dpu_mdss_enable(struct msm_mdss *mdss)
struct dss_module_power *mp = _mdss->mp;
int ret;
 
-   dpu_mdss_icc_request_bw(mdss);
-
ret = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true);
if (ret) {
DPU_ERROR("clock enable failed, ret:%d\n", ret);
@@ -213,15 +179,12 @@ static int dpu_mdss_disable(struct msm_mdss *mdss)
 {
struct dpu_mdss *dpu_mdss = to_dpu_mdss(mdss);
struct dss_module_power *mp = _mdss->mp;
-   int ret, i;
+   int ret;
 
ret = msm_dss_enable_clk(mp->clk_config, mp->num_clk, false);
if (ret)
DPU_ERROR("clock disable failed, ret:%d\n", ret);
 
-   for (i = 0; i < dpu_mdss->num_paths; i++)
-   icc_set_bw(dpu_mdss->path[i], 0, 0);
-
return ret;
 }
 
@@ -232,7 +195,6 @@ static void dpu_mdss_destroy(struct drm_device *dev)
struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
struct dss_module_power *mp = _mdss->mp;
int irq;
-   int i;
 
pm_runtime_suspend(dev->dev);
pm_runtime_disable(dev->dev);
@@ -242,9 +204,6 @@ static void dpu_mdss_destroy(struct drm_device *dev)
msm_dss_put_clk(mp->clk_config, mp->num_clk);
devm_kfree(>dev, mp->clk_config);
 
-   for (i = 0; i < dpu_mdss->num_paths; i++)
-   icc_put(dpu_mdss->path[i]);
-
if (dpu_mdss->mmio)
devm_iounmap(>dev, dpu_mdss->mmio);
dpu_mdss->mmio = NULL;
@@ -276,12 +235,6 @@ int dpu_mdss_init(struct drm_device *dev)
 
DRM_DEBUG("mapped mdss address space @%pK\n", dpu_mdss->mmio);
 
-   if (!of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) {
-   ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
-   if (ret)
-   return ret;
-   }
-
mp = _mdss->mp;
ret = msm_dss_parse_clock(pdev, mp);
if (ret) {
@@ -307,8 +260,6 @@ int dpu_mdss_init(struct drm_device *dev)
 
pm_runtime_enable(dev->dev);
 
-   dpu_mdss_icc_request_bw(priv->mdss);
-
return ret;
 
 irq_error:
-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[pull] amdgpu, amdkfd drm-fixes-5.12

2021-03-31 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.12.

The following changes since commit 09d78dde88ef95a27b54a6e450ee700ccabdf39d:

  Merge tag 'drm-msm-fixes-2021-02-25' of 
https://gitlab.freedesktop.org/drm/msm into drm-fixes (2021-03-26 13:04:17 
+1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.12-2021-03-31

for you to fetch changes up to e3512fb67093fabdf27af303066627b921ee9bd8:

  drm/amdgpu: check alignment on CPU page for bo map (2021-03-31 21:53:38 -0400)


amd-drm-fixes-5.12-2021-03-31:

amdgpu:
- Polaris idle power fix
- VM fix
- Vangogh S3 fix
- Fixes for non-4K page sizes

amdkfd:
- dqm fence memory corruption fix


Alex Deucher (1):
  drm/amdgpu/vangogh: don't check for dpm in is_dpm_running when in suspend

Evan Quan (1):
  drm/amd/pm: no need to force MCLK to highest when no display connected

Huacai Chen (1):
  drm/amdgpu: Set a suitable dev_info.gart_page_size

Nirmoy Das (1):
  drm/amdgpu: fix offset calculation in amdgpu_vm_bo_clear_mappings()

Qu Huang (1):
  drm/amdkfd: dqm fence memory corruption

Xℹ Ruoyao (1):
  drm/amdgpu: check alignment on CPU page for bo map

 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  8 
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c   |  3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  |  5 +
 11 files changed, 26 insertions(+), 20 deletions(-)
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 4/4] drm/msm: Improved debugfs gem stats

2021-03-31 Thread Rob Clark
From: Rob Clark 

The last patch lost the breakdown of active vs inactive GEM objects in
$debugfs/gem.  But we can add some better stats to summarize not just
active vs inactive, but also purgable/purged to make up for that.

Signed-off-by: Rob Clark 
Tested-by: Douglas Anderson 
Reviewed-by: Douglas Anderson 
---
 drivers/gpu/drm/msm/msm_fb.c  |  3 ++-
 drivers/gpu/drm/msm/msm_gem.c | 31 ---
 drivers/gpu/drm/msm/msm_gem.h | 11 ++-
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index d42f0665359a..91c0e493aed5 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -33,6 +33,7 @@ static const struct drm_framebuffer_funcs 
msm_framebuffer_funcs = {
 #ifdef CONFIG_DEBUG_FS
 void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
 {
+   struct msm_gem_stats stats = {};
int i, n = fb->format->num_planes;
 
seq_printf(m, "fb: %dx%d@%4.4s (%2d, ID:%d)\n",
@@ -42,7 +43,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, 
struct seq_file *m)
for (i = 0; i < n; i++) {
seq_printf(m, "   %d: offset=%d pitch=%d, obj: ",
i, fb->offsets[i], fb->pitches[i]);
-   msm_gem_describe(fb->obj[i], m);
+   msm_gem_describe(fb->obj[i], m, );
}
 }
 #endif
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 7ca30e36..2ecf7f1cef25 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -873,7 +873,8 @@ static void describe_fence(struct dma_fence *fence, const 
char *type,
fence->seqno);
 }
 
-void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
+   struct msm_gem_stats *stats)
 {
struct msm_gem_object *msm_obj = to_msm_bo(obj);
struct dma_resv *robj = obj->resv;
@@ -885,11 +886,23 @@ void msm_gem_describe(struct drm_gem_object *obj, struct 
seq_file *m)
 
msm_gem_lock(obj);
 
+   stats->all.count++;
+   stats->all.size += obj->size;
+
+   if (is_active(msm_obj)) {
+   stats->active.count++;
+   stats->active.size += obj->size;
+   }
+
switch (msm_obj->madv) {
case __MSM_MADV_PURGED:
+   stats->purged.count++;
+   stats->purged.size += obj->size;
madv = " purged";
break;
case MSM_MADV_DONTNEED:
+   stats->purgable.count++;
+   stats->purgable.size += obj->size;
madv = " purgeable";
break;
case MSM_MADV_WILLNEED:
@@ -956,20 +969,24 @@ void msm_gem_describe(struct drm_gem_object *obj, struct 
seq_file *m)
 
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 {
+   struct msm_gem_stats stats = {};
struct msm_gem_object *msm_obj;
-   int count = 0;
-   size_t size = 0;
 
seq_puts(m, "   flags   id ref  offset   kaddrsize 
madv  name\n");
list_for_each_entry(msm_obj, list, node) {
struct drm_gem_object *obj = _obj->base;
seq_puts(m, "   ");
-   msm_gem_describe(obj, m);
-   count++;
-   size += obj->size;
+   msm_gem_describe(obj, m, );
}
 
-   seq_printf(m, "Total %d objects, %zu bytes\n", count, size);
+   seq_printf(m, "Total:%4d objects, %9zu bytes\n",
+   stats.all.count, stats.all.size);
+   seq_printf(m, "Active:   %4d objects, %9zu bytes\n",
+   stats.active.count, stats.active.size);
+   seq_printf(m, "Purgable: %4d objects, %9zu bytes\n",
+   stats.purgable.count, stats.purgable.size);
+   seq_printf(m, "Purged:   %4d objects, %9zu bytes\n",
+   stats.purged.count, stats.purged.size);
 }
 #endif
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index e6b28edb1db9..7c7d54bad189 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -158,7 +158,16 @@ struct drm_gem_object *msm_gem_import(struct drm_device 
*dev,
 __printf(2, 3)
 void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...);
 #ifdef CONFIG_DEBUG_FS
-void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
+
+struct msm_gem_stats {
+   struct {
+   unsigned count;
+   size_t size;
+   } all, active, purgable, purged;
+};
+
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
+   struct msm_gem_stats *stats);
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
 #endif
 
-- 
2.30.2

___
dri-devel mailing list

[PATCH v2 3/4] drm/msm: Fix debugfs deadlock

2021-03-31 Thread Rob Clark
From: Rob Clark 

In normal cases the gem obj lock is acquired first before mm_lock.  The
exception is iterating the various object lists.  In the shrinker path,
deadlock is avoided by using msm_gem_trylock() and skipping over objects
that cannot be locked.  But for debugfs the straightforward thing is to
split things out into a separate list of all objects protected by it's
own lock.

Fixes: d984457b31c4 ("drm/msm: Add priv->mm_lock to protect active/inactive 
lists")
Signed-off-by: Rob Clark 
Tested-by: Douglas Anderson 
---
 drivers/gpu/drm/msm/msm_debugfs.c | 14 +++---
 drivers/gpu/drm/msm/msm_drv.c |  3 +++
 drivers/gpu/drm/msm/msm_drv.h |  9 -
 drivers/gpu/drm/msm/msm_gem.c | 14 +-
 drivers/gpu/drm/msm/msm_gem.h | 10 --
 5 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_debugfs.c 
b/drivers/gpu/drm/msm/msm_debugfs.c
index 85ad0babc326..d611cc8e54a4 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -111,23 +111,15 @@ static const struct file_operations msm_gpu_fops = {
 static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
 {
struct msm_drm_private *priv = dev->dev_private;
-   struct msm_gpu *gpu = priv->gpu;
int ret;
 
-   ret = mutex_lock_interruptible(>mm_lock);
+   ret = mutex_lock_interruptible(>obj_lock);
if (ret)
return ret;
 
-   if (gpu) {
-   seq_printf(m, "Active Objects (%s):\n", gpu->name);
-   msm_gem_describe_objects(>active_list, m);
-   }
-
-   seq_printf(m, "Inactive Objects:\n");
-   msm_gem_describe_objects(>inactive_dontneed, m);
-   msm_gem_describe_objects(>inactive_willneed, m);
+   msm_gem_describe_objects(>objects, m);
 
-   mutex_unlock(>mm_lock);
+   mutex_unlock(>obj_lock);
 
return 0;
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 3462b0ea14c6..1ef1cd0cc714 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -474,6 +474,9 @@ static int msm_drm_init(struct device *dev, const struct 
drm_driver *drv)
 
priv->wq = alloc_ordered_workqueue("msm", 0);
 
+   INIT_LIST_HEAD(>objects);
+   mutex_init(>obj_lock);
+
INIT_LIST_HEAD(>inactive_willneed);
INIT_LIST_HEAD(>inactive_dontneed);
INIT_LIST_HEAD(>inactive_purged);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 503168817e24..c84e6f84cb6d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -174,7 +174,14 @@ struct msm_drm_private {
struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
struct msm_perf_state *perf;
 
-   /*
+   /**
+* List of all GEM objects (mainly for debugfs, protected by obj_lock
+* (acquire before per GEM object lock)
+*/
+   struct list_head objects;
+   struct mutex obj_lock;
+
+   /**
 * Lists of inactive GEM objects.  Every bo is either in one of the
 * inactive lists (depending on whether or not it is shrinkable) or
 * gpu->active_list (for the gpu it is active on[1])
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index bec01bb48fce..7ca30e36 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -961,7 +961,7 @@ void msm_gem_describe_objects(struct list_head *list, 
struct seq_file *m)
size_t size = 0;
 
seq_puts(m, "   flags   id ref  offset   kaddrsize 
madv  name\n");
-   list_for_each_entry(msm_obj, list, mm_list) {
+   list_for_each_entry(msm_obj, list, node) {
struct drm_gem_object *obj = _obj->base;
seq_puts(m, "   ");
msm_gem_describe(obj, m);
@@ -980,6 +980,10 @@ void msm_gem_free_object(struct drm_gem_object *obj)
struct drm_device *dev = obj->dev;
struct msm_drm_private *priv = dev->dev_private;
 
+   mutex_lock(>obj_lock);
+   list_del(_obj->node);
+   mutex_unlock(>obj_lock);
+
mutex_lock(>mm_lock);
if (msm_obj->dontneed)
mark_unpurgable(msm_obj);
@@ -1170,6 +1174,10 @@ static struct drm_gem_object *_msm_gem_new(struct 
drm_device *dev,
list_add_tail(_obj->mm_list, >inactive_willneed);
mutex_unlock(>mm_lock);
 
+   mutex_lock(>obj_lock);
+   list_add_tail(_obj->node, >objects);
+   mutex_unlock(>obj_lock);
+
return obj;
 
 fail:
@@ -1240,6 +1248,10 @@ struct drm_gem_object *msm_gem_import(struct drm_device 
*dev,
list_add_tail(_obj->mm_list, >inactive_willneed);
mutex_unlock(>mm_lock);
 
+   mutex_lock(>obj_lock);
+   list_add_tail(_obj->node, >objects);
+   mutex_unlock(>obj_lock);
+
return obj;
 
 fail:
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 

[PATCH v2 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Rob Clark
From: Rob Clark 

When the system is under heavy memory pressure, we can end up with lots
of concurrent calls into the shrinker.  Keeping a running tab on what we
can shrink avoids grabbing a lock in shrinker->count(), and avoids
shrinker->scan() getting called when not profitable.

Also, we can keep purged objects in their own list to avoid re-traversing
them to help cut down time in the critical section further.

Signed-off-by: Rob Clark 
Tested-by: Douglas Anderson 
---
 drivers/gpu/drm/msm/msm_drv.c  |  1 +
 drivers/gpu/drm/msm/msm_drv.h  |  6 ++-
 drivers/gpu/drm/msm/msm_gem.c  | 20 --
 drivers/gpu/drm/msm/msm_gem.h  | 53 --
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 28 ++
 5 files changed, 81 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 4f9fa0189a07..3462b0ea14c6 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -476,6 +476,7 @@ static int msm_drm_init(struct device *dev, const struct 
drm_driver *drv)
 
INIT_LIST_HEAD(>inactive_willneed);
INIT_LIST_HEAD(>inactive_dontneed);
+   INIT_LIST_HEAD(>inactive_purged);
mutex_init(>mm_lock);
 
/* Teach lockdep about lock ordering wrt. shrinker: */
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index a1264cfcac5e..503168817e24 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -179,8 +179,8 @@ struct msm_drm_private {
 * inactive lists (depending on whether or not it is shrinkable) or
 * gpu->active_list (for the gpu it is active on[1])
 *
-* These lists are protected by mm_lock.  If struct_mutex is involved, 
it
-* should be aquired prior to mm_lock.  One should *not* hold mm_lock in
+* These lists are protected by mm_lock (which should be acquired
+* before per GEM object lock).  One should *not* hold mm_lock in
 * get_pages()/vmap()/etc paths, as they can trigger the shrinker.
 *
 * [1] if someone ever added support for the old 2d cores, there could 
be
@@ -188,6 +188,8 @@ struct msm_drm_private {
 */
struct list_head inactive_willneed;  /* inactive + !shrinkable */
struct list_head inactive_dontneed;  /* inactive +  shrinkable */
+   struct list_head inactive_purged;/* inactive +  purged */
+   long shrinkable_count;   /* write access under mm_lock */
struct mutex mm_lock;
 
struct workqueue_struct *wq;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 9d10739c4eb2..bec01bb48fce 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -719,6 +719,7 @@ void msm_gem_purge(struct drm_gem_object *obj)
put_iova_vmas(obj);
 
msm_obj->madv = __MSM_MADV_PURGED;
+   mark_unpurgable(msm_obj);
 
drm_vma_node_unmap(>vma_node, dev->anon_inode->i_mapping);
drm_gem_free_mmap_offset(obj);
@@ -790,10 +791,11 @@ void msm_gem_active_get(struct drm_gem_object *obj, 
struct msm_gpu *gpu)
might_sleep();
WARN_ON(!msm_gem_is_locked(obj));
WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
+   WARN_ON(msm_obj->dontneed);
 
if (msm_obj->active_count++ == 0) {
mutex_lock(>mm_lock);
-   list_del_init(_obj->mm_list);
+   list_del(_obj->mm_list);
list_add_tail(_obj->mm_list, >active_list);
mutex_unlock(>mm_lock);
}
@@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
*msm_obj)
mutex_lock(>mm_lock);
WARN_ON(msm_obj->active_count != 0);
 
-   list_del_init(_obj->mm_list);
-   if (msm_obj->madv == MSM_MADV_WILLNEED)
+   if (msm_obj->dontneed)
+   mark_unpurgable(msm_obj);
+
+   list_del(_obj->mm_list);
+   if (msm_obj->madv == MSM_MADV_WILLNEED) {
list_add_tail(_obj->mm_list, >inactive_willneed);
-   else
+   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
list_add_tail(_obj->mm_list, >inactive_dontneed);
+   mark_purgable(msm_obj);
+   } else {
+   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
+   list_add_tail(_obj->mm_list, >inactive_purged);
+   }
 
mutex_unlock(>mm_lock);
 }
@@ -971,6 +981,8 @@ void msm_gem_free_object(struct drm_gem_object *obj)
struct msm_drm_private *priv = dev->dev_private;
 
mutex_lock(>mm_lock);
+   if (msm_obj->dontneed)
+   mark_unpurgable(msm_obj);
list_del(_obj->mm_list);
mutex_unlock(>mm_lock);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 7a9107cf1818..13aabfe92dac 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -50,18 +50,24 @@ struct msm_gem_object {
 */

[PATCH v2 1/4] drm/msm: Remove unused freed llist node

2021-03-31 Thread Rob Clark
From: Rob Clark 

Unused since commit c951a9b284b9 ("drm/msm: Remove msm_gem_free_work")

Signed-off-by: Rob Clark 
Tested-by: Douglas Anderson 
---
 drivers/gpu/drm/msm/msm_gem.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index b3a0a880cbab..7a9107cf1818 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -78,8 +78,6 @@ struct msm_gem_object {
 
struct list_head vmas;/* list of msm_gem_vma */
 
-   struct llist_node freed;
-
/* For physically contiguous buffers.  Used when we don't have
 * an IOMMU.  Also used for stolen/splashscreen buffer.
 */
-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 0/4] drm/msm: Shrinker (and related) fixes

2021-03-31 Thread Rob Clark
From: Rob Clark 

I've been spending some time looking into how things behave under high
memory pressure.  The first patch is a random cleanup I noticed along
the way.  The second improves the situation significantly when we are
getting shrinker called from many threads in parallel.  And the last
two are $debugfs/gem fixes I needed so I could monitor the state of GEM
objects (ie. how many are active/purgable/purged) while triggering high
memory pressure.

We could probably go a bit further with dropping the mm_lock in the
shrinker->scan() loop, but this is already a pretty big improvement.
The next step is probably actually to add support to unpin/evict
inactive objects.  (We are part way there since we have already de-
coupled the iova lifetime from the pages lifetime, but there are a
few sharp corners to work through.)

Rob Clark (4):
  drm/msm: Remove unused freed llist node
  drm/msm: Avoid mutex in shrinker_count()
  drm/msm: Fix debugfs deadlock
  drm/msm: Improved debugfs gem stats

 drivers/gpu/drm/msm/msm_debugfs.c  | 14 ++---
 drivers/gpu/drm/msm/msm_drv.c  |  4 ++
 drivers/gpu/drm/msm/msm_drv.h  | 15 --
 drivers/gpu/drm/msm/msm_fb.c   |  3 +-
 drivers/gpu/drm/msm/msm_gem.c  | 65 ++-
 drivers/gpu/drm/msm/msm_gem.h  | 72 +++---
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 28 --
 7 files changed, 150 insertions(+), 51 deletions(-)

-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v7] drm/loongson: Add DRM Driver for Loongson 7A1000 bridge chip

2021-03-31 Thread lichenyang
This patch adds an initial DRM driver for the Loongson LS7A1000
bridge chip(LS7A). The LS7A bridge chip contains two display
controllers, support dual display output. The maximum support for
each channel display is to 1920x1080@60Hz.
At present, DC device detection and DRM driver registration are
completed, the crtc/plane/encoder/connector objects has been
implemented.
On Loongson 3A4000 CPU and 7A1000 system, we have achieved the use
of dual screen, and support dual screen clone mode and expansion
mode.

v7:
- The pixel clock is limited to less than 173000.

v6:
- Remove spin_lock in mmio reg read and write.
- TO_UNCAC is replac with ioremap.
- Fix error arguments in crtc_atomic_enable/disable/mode_valid.

v5:
- Change the name of the chip to LS7A.
- Change magic value in crtc to macros.
- Correct mistakes words.
- Change the register operation function prefix to ls7a.

v4:
- Move the mode_valid function to the crtc.

v3:
- Move the mode_valid function to the connector and optimize it.
- Fix num_crtc calculation method.

v2:
- Complete the case of 32-bit color in CRTC.

Signed-off-by: Chenyang Li 
---
 drivers/gpu/drm/Kconfig   |   2 +
 drivers/gpu/drm/Makefile  |   1 +
 drivers/gpu/drm/loongson/Kconfig  |  14 +
 drivers/gpu/drm/loongson/Makefile |  14 +
 drivers/gpu/drm/loongson/loongson_connector.c |  48 
 drivers/gpu/drm/loongson/loongson_crtc.c  | 243 
 drivers/gpu/drm/loongson/loongson_device.c|  47 +++
 drivers/gpu/drm/loongson/loongson_drv.c   | 270 ++
 drivers/gpu/drm/loongson/loongson_drv.h   | 139 +
 drivers/gpu/drm/loongson/loongson_encoder.c   |  37 +++
 drivers/gpu/drm/loongson/loongson_plane.c | 102 +++
 11 files changed, 917 insertions(+)
 create mode 100644 drivers/gpu/drm/loongson/Kconfig
 create mode 100644 drivers/gpu/drm/loongson/Makefile
 create mode 100644 drivers/gpu/drm/loongson/loongson_connector.c
 create mode 100644 drivers/gpu/drm/loongson/loongson_crtc.c
 create mode 100644 drivers/gpu/drm/loongson/loongson_device.c
 create mode 100644 drivers/gpu/drm/loongson/loongson_drv.c
 create mode 100644 drivers/gpu/drm/loongson/loongson_drv.h
 create mode 100644 drivers/gpu/drm/loongson/loongson_encoder.c
 create mode 100644 drivers/gpu/drm/loongson/loongson_plane.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0973f408d75f..6ed1b6dc2f25 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -374,6 +374,8 @@ source "drivers/gpu/drm/xen/Kconfig"
 
 source "drivers/gpu/drm/vboxvideo/Kconfig"
 
+source "drivers/gpu/drm/loongson/Kconfig"
+
 source "drivers/gpu/drm/lima/Kconfig"
 
 source "drivers/gpu/drm/panfrost/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index fefaff4c832d..f87da730ea6d 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_TVE200) += tve200/
 obj-$(CONFIG_DRM_XEN) += xen/
 obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
+obj-$(CONFIG_DRM_LOONGSON) += loongson/
 obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig
new file mode 100644
index ..3cf42a4cca08
--- /dev/null
+++ b/drivers/gpu/drm/loongson/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_LOONGSON
+   tristate "DRM support for LS7A bridge chipset"
+   depends on DRM && PCI
+   depends on CPU_LOONGSON64
+   select DRM_KMS_HELPER
+   select DRM_VRAM_HELPER
+   select DRM_TTM
+   select DRM_TTM_HELPER
+   default n
+   help
+ Support the display controllers found on the Loongson LS7A
+ bridge.
diff --git a/drivers/gpu/drm/loongson/Makefile 
b/drivers/gpu/drm/loongson/Makefile
new file mode 100644
index ..22d063953b78
--- /dev/null
+++ b/drivers/gpu/drm/loongson/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for loongson drm drivers.
+# This driver provides support for the
+# Direct Rendering Infrastructure (DRI)
+
+ccflags-y := -Iinclude/drm
+loongson-y := loongson_drv.o \
+   loongson_crtc.o \
+   loongson_plane.o \
+   loongson_device.o \
+   loongson_connector.o \
+   loongson_encoder.o
+obj-$(CONFIG_DRM_LOONGSON) += loongson.o
diff --git a/drivers/gpu/drm/loongson/loongson_connector.c 
b/drivers/gpu/drm/loongson/loongson_connector.c
new file mode 100644
index ..6b1f0ffa33bd
--- /dev/null
+++ b/drivers/gpu/drm/loongson/loongson_connector.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "loongson_drv.h"
+
+static int loongson_get_modes(struct drm_connector *connector)
+{
+   int count;
+
+   count = drm_add_modes_noedid(connector, 1920, 1080);
+   drm_set_preferred_mode(connector, 1024, 

Re: [PATCH v7 5/8] mm: Device exclusive memory access

2021-03-31 Thread Jason Gunthorpe
On Thu, Apr 01, 2021 at 11:45:57AM +1100, Alistair Popple wrote:
> On Thursday, 1 April 2021 12:46:04 AM AEDT Jason Gunthorpe wrote:
> > On Thu, Apr 01, 2021 at 12:27:52AM +1100, Alistair Popple wrote:
> > > On Thursday, 1 April 2021 12:18:54 AM AEDT Jason Gunthorpe wrote:
> > > > On Wed, Mar 31, 2021 at 11:59:28PM +1100, Alistair Popple wrote:
> > > > 
> > > > > I guess that makes sense as the split could go either way at the
> > > > > moment but I should add a check to make sure this isn't used with
> > > > > pinned pages anyway.
> > > > 
> > > > Is it possible to have a pinned page under one of these things? If I
> > > > pin it before you migrate it then it remains pinned but hidden under
> > > > the swap entry?
> > > 
> > > At the moment yes. But I had planned (and this reminded me) to add a 
> > > check 
> to 
> > > prevent marking pinned pages for exclusive access. 
> > 
> > How do you even do that without races with GUP fast?
> 
> Unless I've missed something I think I've convinced myself it should be safe 
> to do the pin check after make_device_exclusive() has replaced all the PTEs 
> with exclusive entries.
> 
> GUP fast sequence:
> 1. Read PTE
> 2. Pin page
> 3. Check PTE
> 4. if PTE changed -> unpin and fallback
> 
> If make_device_exclusive() runs after (1) it will either succeed or see the 
> pin from (2) and fail (as desired). GUP should always see the PTE change and 
> fallback which will revoke the exclusive access.

AFAICT the user can trigger fork at that instant and fork will try to
copy the desposited migration entry before it has been checked

Jason
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v7 5/8] mm: Device exclusive memory access

2021-03-31 Thread Alistair Popple
On Thursday, 1 April 2021 12:46:04 AM AEDT Jason Gunthorpe wrote:
> On Thu, Apr 01, 2021 at 12:27:52AM +1100, Alistair Popple wrote:
> > On Thursday, 1 April 2021 12:18:54 AM AEDT Jason Gunthorpe wrote:
> > > On Wed, Mar 31, 2021 at 11:59:28PM +1100, Alistair Popple wrote:
> > > 
> > > > I guess that makes sense as the split could go either way at the
> > > > moment but I should add a check to make sure this isn't used with
> > > > pinned pages anyway.
> > > 
> > > Is it possible to have a pinned page under one of these things? If I
> > > pin it before you migrate it then it remains pinned but hidden under
> > > the swap entry?
> > 
> > At the moment yes. But I had planned (and this reminded me) to add a check 
to 
> > prevent marking pinned pages for exclusive access. 
> 
> How do you even do that without races with GUP fast?

Unless I've missed something I think I've convinced myself it should be safe 
to do the pin check after make_device_exclusive() has replaced all the PTEs 
with exclusive entries.

GUP fast sequence:
1. Read PTE
2. Pin page
3. Check PTE
4. if PTE changed -> unpin and fallback

If make_device_exclusive() runs after (1) it will either succeed or see the 
pin from (2) and fail (as desired). GUP should always see the PTE change and 
fallback which will revoke the exclusive access.

 - Alistair

> Jason
> 




___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Rob Clark
On Wed, Mar 31, 2021 at 4:39 PM Doug Anderson  wrote:
>
> Hi,
>
> On Wed, Mar 31, 2021 at 4:23 PM Rob Clark  wrote:
> >
> > On Wed, Mar 31, 2021 at 3:44 PM Doug Anderson  wrote:
> > >
> > > Hi,
> > >
> > > On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
> > > >
> > > > @@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
> > > > *msm_obj)
> > > > mutex_lock(>mm_lock);
> > > > WARN_ON(msm_obj->active_count != 0);
> > > >
> > > > +   if (msm_obj->dontneed)
> > > > +   mark_unpurgable(msm_obj);
> > > > +
> > > > list_del_init(_obj->mm_list);
> > > > -   if (msm_obj->madv == MSM_MADV_WILLNEED)
> > > > +   if (msm_obj->madv == MSM_MADV_WILLNEED) {
> > > > list_add_tail(_obj->mm_list, 
> > > > >inactive_willneed);
> > > > -   else
> > > > +   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
> > > > list_add_tail(_obj->mm_list, 
> > > > >inactive_dontneed);
> > > > +   mark_purgable(msm_obj);
> > > > +   } else {
> > > > +   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
> > > > +   list_add_tail(_obj->mm_list, 
> > > > >inactive_purged);
> > >
> > > I'm probably being dense, but what's the point of adding it to the
> > > "inactive_purged" list here? You never look at that list, right? You
> > > already did a list_del_init() on this object's list pointer
> > > ("mm_list"). I don't see how adding it to a bogus list helps with
> > > anything.
> >
> > It preserves the "every bo is in one of these lists" statement, but
> > other than that you are right we aren't otherwise doing anything with
> > that list.  (Or we could replace the list_del_init() with list_del()..
> > I tend to instinctively go for list_del_init())
>
> If you really want this list, it wouldn't hurt to at least have a
> comment saying that it's not used for anything so people like me doing
> go trying to figure out what it's used for. ;-)
>
>
> > > > @@ -198,6 +203,33 @@ static inline bool is_vunmapable(struct 
> > > > msm_gem_object *msm_obj)
> > > > return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
> > > >  }
> > > >
> > > > +static inline void mark_purgable(struct msm_gem_object *msm_obj)
> > > > +{
> > > > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > > > +
> > > > +   WARN_ON(!mutex_is_locked(>mm_lock));
> > > > +
> > > > +   if (WARN_ON(msm_obj->dontneed))
> > > > +   return;
> > >
> > > The is_purgeable() function also checks other things besides just
> > > "MSM_MADV_DONTNEED". Do we need to check those too? Specifically:
> > >
> > >  msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach
> > >
> > > ...or is it just being paranoid?
> > >
> > > I guess I'm just worried that if any of those might be important then
> > > we'll consistently report back that we have a count of things that can
> > > be purged but then scan() won't find anything to do. That wouldn't be
> > > great.
> >
> > Hmm, I thought msm_gem_madvise() returned an error instead of allowing
> > MSM_MADV_DONTNEED to be set on imported/exported dma-bufs.. it
> > probably should to be complete (but userspace already knows not to
> > madvise an imported/exported buffer for other reasons.. ie. we can't
> > let a shared buffer end up in the bo cache).  I'll re-work that a bit.
> >
> > The msm_obj->sgt case is a bit more tricky.. that will be the case of
> > a freshly allocated obj that does not have backing patches yet.  But
> > it seems like enough of a corner case, that I'm happy to live with
> > it.. ie. the tricky thing is not leaking decrements of
> > priv->shrinkable_count or underflowing priv->shrinkable_count, and
> > caring about the !msm_obj->sgt case doubles the number of states an
> > object can be in, and the shrinker->count() return value is just an
> > estimate.
>
> I think it's equally important to make sure that we don't constantly
> have a non-zero count and then have scan() do nothing.  If there's a
> transitory blip then it's fine, but it's not OK if it can be steady
> state. Then you end up with:
>
> 1. How many objects do you have to free? 10
> 2. OK, free some. How many did you free? 0
> 3. Oh. You got more to do, I'll call you again.
> 4. Goto #1
>
> ...and it just keeps looping, right?

Looking more closely at vmscan, it looks like we should return
SHRINK_STOP instead of zero

BR,
-R

>
> As long as you're confident that this case can't happen then we're
> probably fine, but good to be careful. Is there any way we can make
> sure that a "freshly allocated object" isn't ever in the "DONTNEED"
> state?
>
>
> > > > +   priv->shrinkable_count += msm_obj->base.size >> PAGE_SHIFT;
> > > > +   msm_obj->dontneed = true;
> > > > +}
> > > > +
> > > > +static inline void mark_unpurgable(struct msm_gem_object *msm_obj)
> > > > +{
> > > > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > > > +
> > > > +   

Re: [PATCH 1/4] drm/msm: Remove unused freed llist node

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
>
> From: Rob Clark 
>
> Unused since c951a9b284b907604759628d273901064c60d09f

Not terribly important, but checkpatch always yells at me when I don't
reference commits by saying:

commit c951a9b284b9 ("drm/msm: Remove msm_gem_free_work")


> Signed-off-by: Rob Clark 
> ---
>  drivers/gpu/drm/msm/msm_gem.h | 2 --
>  1 file changed, 2 deletions(-)

Reviewed-by: Douglas Anderson 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 0/4] drm/msm: Shrinker (and related) fixes

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
>
> From: Rob Clark 
>
> I've been spending some time looking into how things behave under high
> memory pressure.  The first patch is a random cleanup I noticed along
> the way.  The second improves the situation significantly when we are
> getting shrinker called from many threads in parallel.  And the last
> two are $debugfs/gem fixes I needed so I could monitor the state of GEM
> objects (ie. how many are active/purgable/purged) while triggering high
> memory pressure.
>
> We could probably go a bit further with dropping the mm_lock in the
> shrinker->scan() loop, but this is already a pretty big improvement.
> The next step is probably actually to add support to unpin/evict
> inactive objects.  (We are part way there since we have already de-
> coupled the iova lifetime from the pages lifetime, but there are a
> few sharp corners to work through.)
>
> Rob Clark (4):
>   drm/msm: Remove unused freed llist node
>   drm/msm: Avoid mutex in shrinker_count()
>   drm/msm: Fix debugfs deadlock
>   drm/msm: Improved debugfs gem stats
>
>  drivers/gpu/drm/msm/msm_debugfs.c  | 14 ++
>  drivers/gpu/drm/msm/msm_drv.c  |  4 ++
>  drivers/gpu/drm/msm/msm_drv.h  | 10 -
>  drivers/gpu/drm/msm/msm_fb.c   |  3 +-
>  drivers/gpu/drm/msm/msm_gem.c  | 61 +-
>  drivers/gpu/drm/msm/msm_gem.h  | 58 +---
>  drivers/gpu/drm/msm/msm_gem_shrinker.c | 17 +--
>  7 files changed, 122 insertions(+), 45 deletions(-)

This makes a pretty big reduction in jankiness when under memory
pressure and seems to work well for me.

Tested-by: Douglas Anderson 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 4:23 PM Rob Clark  wrote:
>
> On Wed, Mar 31, 2021 at 3:44 PM Doug Anderson  wrote:
> >
> > Hi,
> >
> > On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
> > >
> > > @@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
> > > *msm_obj)
> > > mutex_lock(>mm_lock);
> > > WARN_ON(msm_obj->active_count != 0);
> > >
> > > +   if (msm_obj->dontneed)
> > > +   mark_unpurgable(msm_obj);
> > > +
> > > list_del_init(_obj->mm_list);
> > > -   if (msm_obj->madv == MSM_MADV_WILLNEED)
> > > +   if (msm_obj->madv == MSM_MADV_WILLNEED) {
> > > list_add_tail(_obj->mm_list, 
> > > >inactive_willneed);
> > > -   else
> > > +   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
> > > list_add_tail(_obj->mm_list, 
> > > >inactive_dontneed);
> > > +   mark_purgable(msm_obj);
> > > +   } else {
> > > +   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
> > > +   list_add_tail(_obj->mm_list, >inactive_purged);
> >
> > I'm probably being dense, but what's the point of adding it to the
> > "inactive_purged" list here? You never look at that list, right? You
> > already did a list_del_init() on this object's list pointer
> > ("mm_list"). I don't see how adding it to a bogus list helps with
> > anything.
>
> It preserves the "every bo is in one of these lists" statement, but
> other than that you are right we aren't otherwise doing anything with
> that list.  (Or we could replace the list_del_init() with list_del()..
> I tend to instinctively go for list_del_init())

If you really want this list, it wouldn't hurt to at least have a
comment saying that it's not used for anything so people like me doing
go trying to figure out what it's used for. ;-)


> > > @@ -198,6 +203,33 @@ static inline bool is_vunmapable(struct 
> > > msm_gem_object *msm_obj)
> > > return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
> > >  }
> > >
> > > +static inline void mark_purgable(struct msm_gem_object *msm_obj)
> > > +{
> > > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > > +
> > > +   WARN_ON(!mutex_is_locked(>mm_lock));
> > > +
> > > +   if (WARN_ON(msm_obj->dontneed))
> > > +   return;
> >
> > The is_purgeable() function also checks other things besides just
> > "MSM_MADV_DONTNEED". Do we need to check those too? Specifically:
> >
> >  msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach
> >
> > ...or is it just being paranoid?
> >
> > I guess I'm just worried that if any of those might be important then
> > we'll consistently report back that we have a count of things that can
> > be purged but then scan() won't find anything to do. That wouldn't be
> > great.
>
> Hmm, I thought msm_gem_madvise() returned an error instead of allowing
> MSM_MADV_DONTNEED to be set on imported/exported dma-bufs.. it
> probably should to be complete (but userspace already knows not to
> madvise an imported/exported buffer for other reasons.. ie. we can't
> let a shared buffer end up in the bo cache).  I'll re-work that a bit.
>
> The msm_obj->sgt case is a bit more tricky.. that will be the case of
> a freshly allocated obj that does not have backing patches yet.  But
> it seems like enough of a corner case, that I'm happy to live with
> it.. ie. the tricky thing is not leaking decrements of
> priv->shrinkable_count or underflowing priv->shrinkable_count, and
> caring about the !msm_obj->sgt case doubles the number of states an
> object can be in, and the shrinker->count() return value is just an
> estimate.

I think it's equally important to make sure that we don't constantly
have a non-zero count and then have scan() do nothing.  If there's a
transitory blip then it's fine, but it's not OK if it can be steady
state. Then you end up with:

1. How many objects do you have to free? 10
2. OK, free some. How many did you free? 0
3. Oh. You got more to do, I'll call you again.
4. Goto #1

...and it just keeps looping, right?

As long as you're confident that this case can't happen then we're
probably fine, but good to be careful. Is there any way we can make
sure that a "freshly allocated object" isn't ever in the "DONTNEED"
state?


> > > +   priv->shrinkable_count += msm_obj->base.size >> PAGE_SHIFT;
> > > +   msm_obj->dontneed = true;
> > > +}
> > > +
> > > +static inline void mark_unpurgable(struct msm_gem_object *msm_obj)
> > > +{
> > > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > > +
> > > +   WARN_ON(!mutex_is_locked(>mm_lock));
> > > +
> > > +   if (WARN_ON(!msm_obj->dontneed))
> > > +   return;
> > > +
> > > +   priv->shrinkable_count -= msm_obj->base.size >> PAGE_SHIFT;
> > > +   WARN_ON(priv->shrinkable_count < 0);
> >
> > If you changed the order maybe you could make shrinkable_count
> > "unsigned long" to match the shrinker API?
> >
> >  new_shrinkable = 

Re: [PATCH 4/4] drm/msm: Improved debugfs gem stats

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
>
> From: Rob Clark 
>
> The last patch lost the breakdown of active vs inactive GEM objects in
> $debugfs/gem.  But we can add some better stats to summarize not just
> active vs inactive, but also purgable/purged to make up for that.
>
> Signed-off-by: Rob Clark 
> ---
>  drivers/gpu/drm/msm/msm_fb.c  |  3 ++-
>  drivers/gpu/drm/msm/msm_gem.c | 31 ---
>  drivers/gpu/drm/msm/msm_gem.h | 11 ++-
>  3 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
> index d42f0665359a..887172a10c9a 100644
> --- a/drivers/gpu/drm/msm/msm_fb.c
> +++ b/drivers/gpu/drm/msm/msm_fb.c
> @@ -33,6 +33,7 @@ static const struct drm_framebuffer_funcs 
> msm_framebuffer_funcs = {
>  #ifdef CONFIG_DEBUG_FS
>  void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
>  {
> +   struct msm_gem_stats stats = {{0}};

nit: instead of "{{0}}", can't you just do:

struct msm_gem_stats stats = {};

...both here and for the other usage.

Other than that this seems good to me.

Reviewed-by: Douglas Anderson 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/4] drm/msm: Fix debugfs deadlock

2021-03-31 Thread Rob Clark
On Wed, Mar 31, 2021 at 4:13 PM Doug Anderson  wrote:
>
> Hi,
>
> On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
> >
> > @@ -111,23 +111,15 @@ static const struct file_operations msm_gpu_fops = {
> >  static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
> >  {
> > struct msm_drm_private *priv = dev->dev_private;
> > -   struct msm_gpu *gpu = priv->gpu;
> > int ret;
> >
> > -   ret = mutex_lock_interruptible(>mm_lock);
> > +   ret = mutex_lock_interruptible(>obj_lock);
> > if (ret)
> > return ret;
> >
> > -   if (gpu) {
> > -   seq_printf(m, "Active Objects (%s):\n", gpu->name);
> > -   msm_gem_describe_objects(>active_list, m);
> > -   }
> > -
> > -   seq_printf(m, "Inactive Objects:\n");
> > -   msm_gem_describe_objects(>inactive_dontneed, m);
> > -   msm_gem_describe_objects(>inactive_willneed, m);
> > +   msm_gem_describe_objects(>objects, m);
>
> I guess we no longer sort the by Active and Inactive but that doesn't
> really matter?

It turned out to be less useful to sort by active/inactive, as much as
just having the summary at the bottom that the last patch adds.  We
can already tell from the per-object entries whether it is
active/purgable/purged.

I did initially try to come up with an approach that let me keep this,
but it would basically amount to re-writing the gem_submit path
(because you cannot do any memory allocation under mm_lock)

>
> > @@ -174,7 +174,13 @@ struct msm_drm_private {
> > struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
> > struct msm_perf_state *perf;
> >
> > -   /*
> > +   /**
> > +* List of all GEM objects (mainly for debugfs, protected by 
> > obj_lock
>
> It wouldn't hurt to talk about lock ordering here? Like: "If we need
> the "obj_lock" and a "gem_lock" at the same time we always grab the
> "obj_lock" first.

good point

>
> > @@ -60,13 +60,20 @@ struct msm_gem_object {
> >  */
> > uint8_t vmap_count;
> >
> > -   /* And object is either:
> > -*  inactive - on priv->inactive_list
> > +   /**
> > +* Node in list of all objects (mainly for debugfs, protected by
> > +* struct_mutex
>
> Not "struct_mutex" in comment, right? Maybe "obj_lock" I think?

oh, right, forgot to fix that from an earlier iteration

BR,
-R
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Rob Clark
On Wed, Mar 31, 2021 at 3:44 PM Doug Anderson  wrote:
>
> Hi,
>
> On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
> >
> > @@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
> > *msm_obj)
> > mutex_lock(>mm_lock);
> > WARN_ON(msm_obj->active_count != 0);
> >
> > +   if (msm_obj->dontneed)
> > +   mark_unpurgable(msm_obj);
> > +
> > list_del_init(_obj->mm_list);
> > -   if (msm_obj->madv == MSM_MADV_WILLNEED)
> > +   if (msm_obj->madv == MSM_MADV_WILLNEED) {
> > list_add_tail(_obj->mm_list, >inactive_willneed);
> > -   else
> > +   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
> > list_add_tail(_obj->mm_list, >inactive_dontneed);
> > +   mark_purgable(msm_obj);
> > +   } else {
> > +   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
> > +   list_add_tail(_obj->mm_list, >inactive_purged);
>
> I'm probably being dense, but what's the point of adding it to the
> "inactive_purged" list here? You never look at that list, right? You
> already did a list_del_init() on this object's list pointer
> ("mm_list"). I don't see how adding it to a bogus list helps with
> anything.

It preserves the "every bo is in one of these lists" statement, but
other than that you are right we aren't otherwise doing anything with
that list.  (Or we could replace the list_del_init() with list_del()..
I tend to instinctively go for list_del_init())

>
> > @@ -198,6 +203,33 @@ static inline bool is_vunmapable(struct msm_gem_object 
> > *msm_obj)
> > return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
> >  }
> >
> > +static inline void mark_purgable(struct msm_gem_object *msm_obj)
> > +{
> > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > +
> > +   WARN_ON(!mutex_is_locked(>mm_lock));
> > +
> > +   if (WARN_ON(msm_obj->dontneed))
> > +   return;
>
> The is_purgeable() function also checks other things besides just
> "MSM_MADV_DONTNEED". Do we need to check those too? Specifically:
>
>  msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach
>
> ...or is it just being paranoid?
>
> I guess I'm just worried that if any of those might be important then
> we'll consistently report back that we have a count of things that can
> be purged but then scan() won't find anything to do. That wouldn't be
> great.

Hmm, I thought msm_gem_madvise() returned an error instead of allowing
MSM_MADV_DONTNEED to be set on imported/exported dma-bufs.. it
probably should to be complete (but userspace already knows not to
madvise an imported/exported buffer for other reasons.. ie. we can't
let a shared buffer end up in the bo cache).  I'll re-work that a bit.

The msm_obj->sgt case is a bit more tricky.. that will be the case of
a freshly allocated obj that does not have backing patches yet.  But
it seems like enough of a corner case, that I'm happy to live with
it.. ie. the tricky thing is not leaking decrements of
priv->shrinkable_count or underflowing priv->shrinkable_count, and
caring about the !msm_obj->sgt case doubles the number of states an
object can be in, and the shrinker->count() return value is just an
estimate.

>
> > +   priv->shrinkable_count += msm_obj->base.size >> PAGE_SHIFT;
> > +   msm_obj->dontneed = true;
> > +}
> > +
> > +static inline void mark_unpurgable(struct msm_gem_object *msm_obj)
> > +{
> > +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> > +
> > +   WARN_ON(!mutex_is_locked(>mm_lock));
> > +
> > +   if (WARN_ON(!msm_obj->dontneed))
> > +   return;
> > +
> > +   priv->shrinkable_count -= msm_obj->base.size >> PAGE_SHIFT;
> > +   WARN_ON(priv->shrinkable_count < 0);
>
> If you changed the order maybe you could make shrinkable_count
> "unsigned long" to match the shrinker API?
>
>  new_shrinkable = msm_obj->base.size >> PAGE_SHIFT;
>  WARN_ON(new_shrinkable > priv->shrinkable_count);
>  priv->shrinkable_count -= new_shrinkable
>

True, although I've developed a preference for signed integers in
cases where it can underflow if you mess up

BR,
-R
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/4] drm/msm: Fix debugfs deadlock

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
>
> @@ -111,23 +111,15 @@ static const struct file_operations msm_gpu_fops = {
>  static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
>  {
> struct msm_drm_private *priv = dev->dev_private;
> -   struct msm_gpu *gpu = priv->gpu;
> int ret;
>
> -   ret = mutex_lock_interruptible(>mm_lock);
> +   ret = mutex_lock_interruptible(>obj_lock);
> if (ret)
> return ret;
>
> -   if (gpu) {
> -   seq_printf(m, "Active Objects (%s):\n", gpu->name);
> -   msm_gem_describe_objects(>active_list, m);
> -   }
> -
> -   seq_printf(m, "Inactive Objects:\n");
> -   msm_gem_describe_objects(>inactive_dontneed, m);
> -   msm_gem_describe_objects(>inactive_willneed, m);
> +   msm_gem_describe_objects(>objects, m);

I guess we no longer sort the by Active and Inactive but that doesn't
really matter?


> @@ -174,7 +174,13 @@ struct msm_drm_private {
> struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
> struct msm_perf_state *perf;
>
> -   /*
> +   /**
> +* List of all GEM objects (mainly for debugfs, protected by obj_lock

It wouldn't hurt to talk about lock ordering here? Like: "If we need
the "obj_lock" and a "gem_lock" at the same time we always grab the
"obj_lock" first.

> @@ -60,13 +60,20 @@ struct msm_gem_object {
>  */
> uint8_t vmap_count;
>
> -   /* And object is either:
> -*  inactive - on priv->inactive_list
> +   /**
> +* Node in list of all objects (mainly for debugfs, protected by
> +* struct_mutex

Not "struct_mutex" in comment, right? Maybe "obj_lock" I think?

-Doug
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 3:14 PM Rob Clark  wrote:
>
> @@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
> *msm_obj)
> mutex_lock(>mm_lock);
> WARN_ON(msm_obj->active_count != 0);
>
> +   if (msm_obj->dontneed)
> +   mark_unpurgable(msm_obj);
> +
> list_del_init(_obj->mm_list);
> -   if (msm_obj->madv == MSM_MADV_WILLNEED)
> +   if (msm_obj->madv == MSM_MADV_WILLNEED) {
> list_add_tail(_obj->mm_list, >inactive_willneed);
> -   else
> +   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
> list_add_tail(_obj->mm_list, >inactive_dontneed);
> +   mark_purgable(msm_obj);
> +   } else {
> +   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
> +   list_add_tail(_obj->mm_list, >inactive_purged);

I'm probably being dense, but what's the point of adding it to the
"inactive_purged" list here? You never look at that list, right? You
already did a list_del_init() on this object's list pointer
("mm_list"). I don't see how adding it to a bogus list helps with
anything.


> @@ -198,6 +203,33 @@ static inline bool is_vunmapable(struct msm_gem_object 
> *msm_obj)
> return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
>  }
>
> +static inline void mark_purgable(struct msm_gem_object *msm_obj)
> +{
> +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> +
> +   WARN_ON(!mutex_is_locked(>mm_lock));
> +
> +   if (WARN_ON(msm_obj->dontneed))
> +   return;

The is_purgeable() function also checks other things besides just
"MSM_MADV_DONTNEED". Do we need to check those too? Specifically:

 msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach

...or is it just being paranoid?

I guess I'm just worried that if any of those might be important then
we'll consistently report back that we have a count of things that can
be purged but then scan() won't find anything to do. That wouldn't be
great.


> +   priv->shrinkable_count += msm_obj->base.size >> PAGE_SHIFT;
> +   msm_obj->dontneed = true;
> +}
> +
> +static inline void mark_unpurgable(struct msm_gem_object *msm_obj)
> +{
> +   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
> +
> +   WARN_ON(!mutex_is_locked(>mm_lock));
> +
> +   if (WARN_ON(!msm_obj->dontneed))
> +   return;
> +
> +   priv->shrinkable_count -= msm_obj->base.size >> PAGE_SHIFT;
> +   WARN_ON(priv->shrinkable_count < 0);

If you changed the order maybe you could make shrinkable_count
"unsigned long" to match the shrinker API?

 new_shrinkable = msm_obj->base.size >> PAGE_SHIFT;
 WARN_ON(new_shrinkable > priv->shrinkable_count);
 priv->shrinkable_count -= new_shrinkable


-Doug
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [v1] drm/msm/disp/dpu1: fix warn stack reported during dpu resume

2021-03-31 Thread Rob Clark
On Wed, Mar 31, 2021 at 9:03 AM Dmitry Baryshkov
 wrote:
>
> On 31/03/2021 14:27, Kalyan Thota wrote:
> > WARN_ON was introduced by the below commit to catch runtime resumes
> > that are getting triggered before icc path was set.
> >
> > "drm/msm/disp/dpu1: icc path needs to be set before dpu runtime resume"
> >
> > For the targets where the bw scaling is not enabled, this WARN_ON is
> > a false alarm. Fix the WARN condition appropriately.
>
> Should we change all DPU targets to use bw scaling to the mdp from the
> mdss nodes? The limitation to sc7180 looks artificial.

yes, we should, this keeps biting us on 845

> >
> > Reported-by: Steev Klimaszewski 

Please add Fixes: tag as well

> > Signed-off-by: Kalyan Thota 
> > ---
> >   drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  8 +---
> >   drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h  |  9 +
> >   drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 11 ++-
> >   3 files changed, 20 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
> > b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > index cab387f..0071a4d 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
> > @@ -294,6 +294,9 @@ static int dpu_kms_parse_data_bus_icc_path(struct 
> > dpu_kms *dpu_kms)
> >   struct icc_path *path1;
> >   struct drm_device *dev = dpu_kms->dev;
> >
> > + if (!dpu_supports_bw_scaling(dev))
> > + return 0;
> > +
> >   path0 = of_icc_get(dev->dev, "mdp0-mem");
> >   path1 = of_icc_get(dev->dev, "mdp1-mem");
> >
> > @@ -934,8 +937,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
> >   DPU_DEBUG("REG_DMA is not defined");
> >   }
> >
> > - if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))
> > - dpu_kms_parse_data_bus_icc_path(dpu_kms);
> > + dpu_kms_parse_data_bus_icc_path(dpu_kms);
> >
> >   pm_runtime_get_sync(_kms->pdev->dev);
> >
> > @@ -1198,7 +1200,7 @@ static int __maybe_unused dpu_runtime_resume(struct 
> > device *dev)
> >
> >   ddev = dpu_kms->dev;
> >
> > - WARN_ON(!(dpu_kms->num_paths));
> > + WARN_ON((dpu_supports_bw_scaling(ddev) && !dpu_kms->num_paths));
> >   /* Min vote of BW is required before turning on AXI clk */
> >   for (i = 0; i < dpu_kms->num_paths; i++)
> >   icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h 
> > b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
> > index d6717d6..f7bcc0a 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
> > @@ -154,6 +154,15 @@ struct vsync_info {
> >
> >   #define to_dpu_global_state(x) container_of(x, struct dpu_global_state, 
> > base)
> >
> > +/**
> > + * dpu_supports_bw_scaling: returns true for drivers that support bw 
> > scaling.
> > + * @dev: Pointer to drm_device structure
> > + */
> > +static inline int dpu_supports_bw_scaling(struct drm_device *dev)
> > +{
> > + return of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss");
> > +}
> > +
> >   /* Global private object state for tracking resources that are shared 
> > across
> >* multiple kms objects (planes/crtcs/etc).
> >*/
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 
> > b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > index cd40788..8cd712c 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > @@ -41,6 +41,9 @@ static int dpu_mdss_parse_data_bus_icc_path(struct 
> > drm_device *dev,
> >   struct icc_path *path0 = of_icc_get(dev->dev, "mdp0-mem");
> >   struct icc_path *path1 = of_icc_get(dev->dev, "mdp1-mem");
> >
> > + if (dpu_supports_bw_scaling(dev))
> > + return 0;
> > +
> >   if (IS_ERR_OR_NULL(path0))
> >   return PTR_ERR_OR_ZERO(path0);
> >
> > @@ -276,11 +279,9 @@ int dpu_mdss_init(struct drm_device *dev)
> >
> >   DRM_DEBUG("mapped mdss address space @%pK\n", dpu_mdss->mmio);
> >
> > - if (!of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) {
> > - ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
> > - if (ret)
> > - return ret;
> > - }
> > + ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
> > + if (ret)
> > + return ret;
> >
> >   mp = _mdss->mp;
> >   ret = msm_dss_parse_clock(pdev, mp);
> >
>
>
> --
> With best wishes
> Dmitry
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 4/4] drm/msm: Improved debugfs gem stats

2021-03-31 Thread Rob Clark
From: Rob Clark 

The last patch lost the breakdown of active vs inactive GEM objects in
$debugfs/gem.  But we can add some better stats to summarize not just
active vs inactive, but also purgable/purged to make up for that.

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_fb.c  |  3 ++-
 drivers/gpu/drm/msm/msm_gem.c | 31 ---
 drivers/gpu/drm/msm/msm_gem.h | 11 ++-
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index d42f0665359a..887172a10c9a 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -33,6 +33,7 @@ static const struct drm_framebuffer_funcs 
msm_framebuffer_funcs = {
 #ifdef CONFIG_DEBUG_FS
 void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
 {
+   struct msm_gem_stats stats = {{0}};
int i, n = fb->format->num_planes;
 
seq_printf(m, "fb: %dx%d@%4.4s (%2d, ID:%d)\n",
@@ -42,7 +43,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, 
struct seq_file *m)
for (i = 0; i < n; i++) {
seq_printf(m, "   %d: offset=%d pitch=%d, obj: ",
i, fb->offsets[i], fb->pitches[i]);
-   msm_gem_describe(fb->obj[i], m);
+   msm_gem_describe(fb->obj[i], m, );
}
 }
 #endif
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index c184ea68a6d0..a933ca5dc6df 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -873,7 +873,8 @@ static void describe_fence(struct dma_fence *fence, const 
char *type,
fence->seqno);
 }
 
-void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
+   struct msm_gem_stats *stats)
 {
struct msm_gem_object *msm_obj = to_msm_bo(obj);
struct dma_resv *robj = obj->resv;
@@ -885,11 +886,23 @@ void msm_gem_describe(struct drm_gem_object *obj, struct 
seq_file *m)
 
msm_gem_lock(obj);
 
+   stats->all.count++;
+   stats->all.size += obj->size;
+
+   if (is_active(msm_obj)) {
+   stats->active.count++;
+   stats->active.size += obj->size;
+   }
+
switch (msm_obj->madv) {
case __MSM_MADV_PURGED:
+   stats->purged.count++;
+   stats->purged.size += obj->size;
madv = " purged";
break;
case MSM_MADV_DONTNEED:
+   stats->purgable.count++;
+   stats->purgable.size += obj->size;
madv = " purgeable";
break;
case MSM_MADV_WILLNEED:
@@ -956,20 +969,24 @@ void msm_gem_describe(struct drm_gem_object *obj, struct 
seq_file *m)
 
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 {
+   struct msm_gem_stats stats = {{0}};
struct msm_gem_object *msm_obj;
-   int count = 0;
-   size_t size = 0;
 
seq_puts(m, "   flags   id ref  offset   kaddrsize 
madv  name\n");
list_for_each_entry(msm_obj, list, node) {
struct drm_gem_object *obj = _obj->base;
seq_puts(m, "   ");
-   msm_gem_describe(obj, m);
-   count++;
-   size += obj->size;
+   msm_gem_describe(obj, m, );
}
 
-   seq_printf(m, "Total %d objects, %zu bytes\n", count, size);
+   seq_printf(m, "Total:%4d objects, %9zu bytes\n",
+   stats.all.count, stats.all.size);
+   seq_printf(m, "Active:   %4d objects, %9zu bytes\n",
+   stats.active.count, stats.active.size);
+   seq_printf(m, "Purgable: %4d objects, %9zu bytes\n",
+   stats.purgable.count, stats.purgable.size);
+   seq_printf(m, "Purged:   %4d objects, %9zu bytes\n",
+   stats.purged.count, stats.purged.size);
 }
 #endif
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 49956196025e..43510ac070dd 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -158,7 +158,16 @@ struct drm_gem_object *msm_gem_import(struct drm_device 
*dev,
 __printf(2, 3)
 void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...);
 #ifdef CONFIG_DEBUG_FS
-void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
+
+struct msm_gem_stats {
+   struct {
+   unsigned count;
+   size_t size;
+   } all, active, purgable, purged;
+};
+
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
+   struct msm_gem_stats *stats);
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
 #endif
 
-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org

[PATCH 1/4] drm/msm: Remove unused freed llist node

2021-03-31 Thread Rob Clark
From: Rob Clark 

Unused since c951a9b284b907604759628d273901064c60d09f

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_gem.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index b3a0a880cbab..7a9107cf1818 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -78,8 +78,6 @@ struct msm_gem_object {
 
struct list_head vmas;/* list of msm_gem_vma */
 
-   struct llist_node freed;
-
/* For physically contiguous buffers.  Used when we don't have
 * an IOMMU.  Also used for stolen/splashscreen buffer.
 */
-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/4] drm/msm: Avoid mutex in shrinker_count()

2021-03-31 Thread Rob Clark
From: Rob Clark 

When the system is under heavy memory pressure, we can end up with lots
of concurrent calls into the shrinker.  Keeping a running tab on what we
can shrink avoids grabbing a lock in shrinker->count(), and avoids
shrinker->scan() getting called when not profitable.

Also, we can keep purged objects in their own list to avoid re-traversing
them to help cut down time in the critical section further.

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_drv.c  |  1 +
 drivers/gpu/drm/msm/msm_drv.h  |  2 ++
 drivers/gpu/drm/msm/msm_gem.c  | 16 +++--
 drivers/gpu/drm/msm/msm_gem.h  | 32 ++
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 17 +-
 5 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 4f9fa0189a07..3462b0ea14c6 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -476,6 +476,7 @@ static int msm_drm_init(struct device *dev, const struct 
drm_driver *drv)
 
INIT_LIST_HEAD(>inactive_willneed);
INIT_LIST_HEAD(>inactive_dontneed);
+   INIT_LIST_HEAD(>inactive_purged);
mutex_init(>mm_lock);
 
/* Teach lockdep about lock ordering wrt. shrinker: */
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index a1264cfcac5e..3ead5755f695 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -188,6 +188,8 @@ struct msm_drm_private {
 */
struct list_head inactive_willneed;  /* inactive + !shrinkable */
struct list_head inactive_dontneed;  /* inactive +  shrinkable */
+   struct list_head inactive_purged;/* inactive +  purged */
+   int shrinkable_count;/* write access under mm_lock */
struct mutex mm_lock;
 
struct workqueue_struct *wq;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 9d10739c4eb2..74a92eedc992 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -719,6 +719,7 @@ void msm_gem_purge(struct drm_gem_object *obj)
put_iova_vmas(obj);
 
msm_obj->madv = __MSM_MADV_PURGED;
+   mark_unpurgable(msm_obj);
 
drm_vma_node_unmap(>vma_node, dev->anon_inode->i_mapping);
drm_gem_free_mmap_offset(obj);
@@ -790,6 +791,7 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct 
msm_gpu *gpu)
might_sleep();
WARN_ON(!msm_gem_is_locked(obj));
WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
+   WARN_ON(msm_obj->dontneed);
 
if (msm_obj->active_count++ == 0) {
mutex_lock(>mm_lock);
@@ -818,11 +820,19 @@ static void update_inactive(struct msm_gem_object 
*msm_obj)
mutex_lock(>mm_lock);
WARN_ON(msm_obj->active_count != 0);
 
+   if (msm_obj->dontneed)
+   mark_unpurgable(msm_obj);
+
list_del_init(_obj->mm_list);
-   if (msm_obj->madv == MSM_MADV_WILLNEED)
+   if (msm_obj->madv == MSM_MADV_WILLNEED) {
list_add_tail(_obj->mm_list, >inactive_willneed);
-   else
+   } else if (msm_obj->madv == MSM_MADV_DONTNEED) {
list_add_tail(_obj->mm_list, >inactive_dontneed);
+   mark_purgable(msm_obj);
+   } else {
+   WARN_ON(msm_obj->madv != __MSM_MADV_PURGED);
+   list_add_tail(_obj->mm_list, >inactive_purged);
+   }
 
mutex_unlock(>mm_lock);
 }
@@ -971,6 +981,8 @@ void msm_gem_free_object(struct drm_gem_object *obj)
struct msm_drm_private *priv = dev->dev_private;
 
mutex_lock(>mm_lock);
+   if (msm_obj->dontneed)
+   mark_unpurgable(msm_obj);
list_del(_obj->mm_list);
mutex_unlock(>mm_lock);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 7a9107cf1818..0feabae75d3d 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -50,6 +50,11 @@ struct msm_gem_object {
 */
uint8_t madv;
 
+   /**
+* Is object on inactive_dontneed list (ie. counted in 
priv->shrinkable_count)?
+*/
+   bool dontneed : 1;
+
/**
 * count of active vmap'ing
 */
@@ -198,6 +203,33 @@ static inline bool is_vunmapable(struct msm_gem_object 
*msm_obj)
return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
 }
 
+static inline void mark_purgable(struct msm_gem_object *msm_obj)
+{
+   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
+
+   WARN_ON(!mutex_is_locked(>mm_lock));
+
+   if (WARN_ON(msm_obj->dontneed))
+   return;
+
+   priv->shrinkable_count += msm_obj->base.size >> PAGE_SHIFT;
+   msm_obj->dontneed = true;
+}
+
+static inline void mark_unpurgable(struct msm_gem_object *msm_obj)
+{
+   struct msm_drm_private *priv = msm_obj->base.dev->dev_private;
+
+   WARN_ON(!mutex_is_locked(>mm_lock));
+
+   if 

[PATCH 0/4] drm/msm: Shrinker (and related) fixes

2021-03-31 Thread Rob Clark
From: Rob Clark 

I've been spending some time looking into how things behave under high
memory pressure.  The first patch is a random cleanup I noticed along
the way.  The second improves the situation significantly when we are
getting shrinker called from many threads in parallel.  And the last
two are $debugfs/gem fixes I needed so I could monitor the state of GEM
objects (ie. how many are active/purgable/purged) while triggering high
memory pressure.

We could probably go a bit further with dropping the mm_lock in the
shrinker->scan() loop, but this is already a pretty big improvement.
The next step is probably actually to add support to unpin/evict
inactive objects.  (We are part way there since we have already de-
coupled the iova lifetime from the pages lifetime, but there are a
few sharp corners to work through.)

Rob Clark (4):
  drm/msm: Remove unused freed llist node
  drm/msm: Avoid mutex in shrinker_count()
  drm/msm: Fix debugfs deadlock
  drm/msm: Improved debugfs gem stats

 drivers/gpu/drm/msm/msm_debugfs.c  | 14 ++
 drivers/gpu/drm/msm/msm_drv.c  |  4 ++
 drivers/gpu/drm/msm/msm_drv.h  | 10 -
 drivers/gpu/drm/msm/msm_fb.c   |  3 +-
 drivers/gpu/drm/msm/msm_gem.c  | 61 +-
 drivers/gpu/drm/msm/msm_gem.h  | 58 +---
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 17 +--
 7 files changed, 122 insertions(+), 45 deletions(-)

-- 
2.30.2

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 3/4] drm/msm: Fix debugfs deadlock

2021-03-31 Thread Rob Clark
From: Rob Clark 

In normal cases the gem obj lock is acquired first before mm_lock.  The
exception is iterating the various object lists.  In the shrinker path,
deadlock is avoided by using msm_gem_trylock() and skipping over objects
that cannot be locked.  But for debugfs the straightforward thing is to
split things out into a separate list of all objects protected by it's
own lock.

Fixes: d984457b31c4 ("drm/msm: Add priv->mm_lock to protect active/inactive 
lists")
Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_debugfs.c | 14 +++---
 drivers/gpu/drm/msm/msm_drv.c |  3 +++
 drivers/gpu/drm/msm/msm_drv.h |  8 +++-
 drivers/gpu/drm/msm/msm_gem.c | 14 +-
 drivers/gpu/drm/msm/msm_gem.h | 13 ++---
 5 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_debugfs.c 
b/drivers/gpu/drm/msm/msm_debugfs.c
index 85ad0babc326..d611cc8e54a4 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -111,23 +111,15 @@ static const struct file_operations msm_gpu_fops = {
 static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
 {
struct msm_drm_private *priv = dev->dev_private;
-   struct msm_gpu *gpu = priv->gpu;
int ret;
 
-   ret = mutex_lock_interruptible(>mm_lock);
+   ret = mutex_lock_interruptible(>obj_lock);
if (ret)
return ret;
 
-   if (gpu) {
-   seq_printf(m, "Active Objects (%s):\n", gpu->name);
-   msm_gem_describe_objects(>active_list, m);
-   }
-
-   seq_printf(m, "Inactive Objects:\n");
-   msm_gem_describe_objects(>inactive_dontneed, m);
-   msm_gem_describe_objects(>inactive_willneed, m);
+   msm_gem_describe_objects(>objects, m);
 
-   mutex_unlock(>mm_lock);
+   mutex_unlock(>obj_lock);
 
return 0;
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 3462b0ea14c6..1ef1cd0cc714 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -474,6 +474,9 @@ static int msm_drm_init(struct device *dev, const struct 
drm_driver *drv)
 
priv->wq = alloc_ordered_workqueue("msm", 0);
 
+   INIT_LIST_HEAD(>objects);
+   mutex_init(>obj_lock);
+
INIT_LIST_HEAD(>inactive_willneed);
INIT_LIST_HEAD(>inactive_dontneed);
INIT_LIST_HEAD(>inactive_purged);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 3ead5755f695..d69f4263bd4e 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -174,7 +174,13 @@ struct msm_drm_private {
struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
struct msm_perf_state *perf;
 
-   /*
+   /**
+* List of all GEM objects (mainly for debugfs, protected by obj_lock
+*/
+   struct list_head objects;
+   struct mutex obj_lock;
+
+   /**
 * Lists of inactive GEM objects.  Every bo is either in one of the
 * inactive lists (depending on whether or not it is shrinkable) or
 * gpu->active_list (for the gpu it is active on[1])
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 74a92eedc992..c184ea68a6d0 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -961,7 +961,7 @@ void msm_gem_describe_objects(struct list_head *list, 
struct seq_file *m)
size_t size = 0;
 
seq_puts(m, "   flags   id ref  offset   kaddrsize 
madv  name\n");
-   list_for_each_entry(msm_obj, list, mm_list) {
+   list_for_each_entry(msm_obj, list, node) {
struct drm_gem_object *obj = _obj->base;
seq_puts(m, "   ");
msm_gem_describe(obj, m);
@@ -980,6 +980,10 @@ void msm_gem_free_object(struct drm_gem_object *obj)
struct drm_device *dev = obj->dev;
struct msm_drm_private *priv = dev->dev_private;
 
+   mutex_lock(>obj_lock);
+   list_del(_obj->node);
+   mutex_unlock(>obj_lock);
+
mutex_lock(>mm_lock);
if (msm_obj->dontneed)
mark_unpurgable(msm_obj);
@@ -1170,6 +1174,10 @@ static struct drm_gem_object *_msm_gem_new(struct 
drm_device *dev,
list_add_tail(_obj->mm_list, >inactive_willneed);
mutex_unlock(>mm_lock);
 
+   mutex_lock(>obj_lock);
+   list_add_tail(_obj->node, >objects);
+   mutex_unlock(>obj_lock);
+
return obj;
 
 fail:
@@ -1240,6 +1248,10 @@ struct drm_gem_object *msm_gem_import(struct drm_device 
*dev,
list_add_tail(_obj->mm_list, >inactive_willneed);
mutex_unlock(>mm_lock);
 
+   mutex_lock(>obj_lock);
+   list_add_tail(_obj->node, >objects);
+   mutex_unlock(>obj_lock);
+
return obj;
 
 fail:
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 0feabae75d3d..49956196025e 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ 

Re: [PATCH v5 2/5] phy: Add LVDS configuration options

2021-03-31 Thread Kishon Vijay Abraham I
Hi,

On 25/03/21 2:30 pm, Liu Ying wrote:
> This patch allows LVDS PHYs to be configured through
> the generic functions and through a custom structure
> added to the generic union.
> 
> The parameters added here are based on common LVDS PHY
> implementation practices.  The set of parameters
> should cover all potential users.
> 
> Cc: Kishon Vijay Abraham I 
> Cc: Vinod Koul 
> Cc: NXP Linux Team 
> Signed-off-by: Liu Ying 
> ---
> v4->v5:
> * Align kernel-doc style to include/linux/phy/phy.h. (Vinod)
> * Trivial tweaks.
> * Drop Robert's R-b tag.
> 
> v3->v4:
> * Add Robert's R-b tag.
> 
> v2->v3:
> * No change.
> 
> v1->v2:
> * No change.
> 
>  include/linux/phy/phy-lvds.h | 32 
>  include/linux/phy/phy.h  |  4 
>  2 files changed, 36 insertions(+)
>  create mode 100644 include/linux/phy/phy-lvds.h
> 
> diff --git a/include/linux/phy/phy-lvds.h b/include/linux/phy/phy-lvds.h
> new file mode 100644
> index ..7a2f474
> --- /dev/null
> +++ b/include/linux/phy/phy-lvds.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2020 NXP
> + */
> +
> +#ifndef __PHY_LVDS_H_
> +#define __PHY_LVDS_H_
> +
> +/**
> + * struct phy_configure_opts_lvds - LVDS configuration set
> + * @bits_per_lane_and_dclk_cycle:Number of bits per data lane and
> + *   differential clock cycle.

phy_set_bus_width() instead?
> + * @differential_clk_rate:   Clock rate, in Hertz, of the LVDS
> + *   differential clock.

Please use clk API's to get rate.
> + * @lanes:   Number of active, consecutive,
> + *   data lanes, starting from lane 0,
> + *   used for the transmissions.
> + * @is_slave:Boolean, true if the phy is a 
> slave
> + *   which works together with a master
> + *   phy to support dual link transmission,
> + *   otherwise a regular phy or a master phy.

For parameters that are known at design time, it doesn't have to be
passed from consumer driver. So all these parameters do they really have
to be passed at runtime?

Thanks
Kishon
> + *
> + * This structure is used to represent the configuration state of a LVDS phy.
> + */
> +struct phy_configure_opts_lvds {
> + unsigned intbits_per_lane_and_dclk_cycle;
> + unsigned long   differential_clk_rate;
> + unsigned intlanes;
> + boolis_slave;
> +};
> +
> +#endif /* __PHY_LVDS_H_ */
> diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
> index e435bdb..d450b44 100644
> --- a/include/linux/phy/phy.h
> +++ b/include/linux/phy/phy.h
> @@ -17,6 +17,7 @@
>  #include 
>  
>  #include 
> +#include 
>  #include 
>  
>  struct phy;
> @@ -51,10 +52,13 @@ enum phy_mode {
>   *   the MIPI_DPHY phy mode.
>   * @dp:  Configuration set applicable for phys supporting
>   *   the DisplayPort protocol.
> + * @lvds:Configuration set applicable for phys supporting
> + *   the LVDS phy mode.
>   */
>  union phy_configure_opts {
>   struct phy_configure_opts_mipi_dphy mipi_dphy;
>   struct phy_configure_opts_dpdp;
> + struct phy_configure_opts_lvds  lvds;
>  };
>  
>  /**
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [pull] amdgpu, amdkfd, radeon drm-next-5.12

2021-03-31 Thread Dave Airlie
I think this is due to this pull, on arm32.

/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:
In function ‘dmub_srv_hw_init’:
/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:519:44:
warning: cast from pointer to integer of different size
[-Wpointer-to-int-cast]
  outbox0_rb_params.base_address = (void
*)((uint64_t)(tracebuff_fb->cpu_addr) + TRACE_BUFFER_ENTRY_OFFSET);
^
/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:519:35:
warning: cast to pointer from integer of different size
[-Wint-to-pointer-cast]
  outbox0_rb_params.base_address = (void
*)((uint64_t)(tracebuff_fb->cpu_addr) + TRACE_BUFFER_ENTRY_OFFSET);

Dave.

On Sat, 27 Mar 2021 at 05:16, Zhuo, Qingqing  wrote:
>
> [AMD Public Use]
>
> On Thu, Feb 18, 2021 at 11:15 PM Alex Deucher  wrote:
> >>
> >> Hi Dave, Daniel,
> >>
> >> Fixes for 5.12.
> >>
> >> The following changes since commit 
> >> 4c3a3292730c56591472717d8c5c0faf74f6c6bb:
> >>
> >>   drm/amd/display: fix unused variable warning (2021-02-05 09:49:44
> >> +1000)
> >>
> >> are available in the Git repository at:
> >>
> >>
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitl
> >> ab.freedesktop.org%2Fagd5f%2Flinux.gitdata=04%7C01%7Cqingqing.zhu
> >> o%40amd.com%7Cce0d1ee6a18b4a95366008d8f082048e%7C3dd8961fe4884e608e11a
> >> 82d994e183d%7C0%7C0%7C637523789263486288%7CUnknown%7CTWFpbGZsb3d8eyJWI
> >> joiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000
> >> mp;sdata=Ig3OkPN0X8OtCOHDJqV%2FZSEOsL7gPs8OMh9sXDniR2w%3Dreserved
> >> =0 tags/amd-drm-next-5.12-2021-02-18
> >>
> >> for you to fetch changes up to 6e80fb8ab04f6c4f377e2fd422bdd1855beb7371:
> >>
> >>   drm/amdgpu: Set reference clock to 100Mhz on Renoir (v2) (2021-02-18
> >> 16:43:09 -0500)
>
> > Pulled into drm-next, with some conflicts, please double-check.
>
> > I also spotted
>
> > commit ea3b4242bc9ca197762119382b37e125815bd67f
> > Author: Qingqing Zhuo 
> > Date:   Tue Feb 9 16:36:41 2021 -0500
>
> >   drm/amd/display: Fix system hang after multiple hotplugs (v3)
>
> > I think it would be good if that could use the drm_vblank_work stuff from 
> > Lyude instead of hand-rolling your own.
> > -Daniel
>
> Hi Daniel,
>
> Thank you for the suggestion! I need to look into further and will do so as 
> soon as I have bandwidth.
>
> Thanks,
> Lillian
>
> >>
> >> 
> >> amd-drm-next-5.12-2021-02-18:
> >>
> >> amdgpu:
> >> - Prefer Bhawan's unused variable fix
> >> - Fixes for high priority queues on gfx8,9
> >> - swSMU fixes for sienna cichlid
> >> - swSMU fixes for renoir
> >> - mmhub client id fixes for arcturus
> >> - SMUIO fixes for navi family
> >> - swSMU fixes for vangogh
> >> - GPU reset cleanup
> >> - Display fixes
> >> - GFX harvesting fix for sienna cichlid
> >> - Fix reference clock on Renoir
> >> - Misc fixes and cleanups
> >>
> >> amdkfd:
> >> - Fix for unique id query
> >> - Fix recursive lock warnings
> >>
> >> radeon:
> >> - Remove confusing VCE messages on Oland
> >>
> >> 
> >> Alex Deucher (16):
> >>   Revert "drm/amd/display: fix unused variable warning"
> >>   drm/amdgpu/smu12: fix power reporting on renoir
> >>   drm/amdgpu/gmc9: fix mmhub client mapping for arcturus
> >>   drm/amdgpu/si: minor clean up of reset code
> >>   drm/amdgpu/cik: minor clean up of reset code
> >>   drm/amdgpu/vi: minor clean up of reset code
> >>   drm/amdgpu: add generic pci reset as an option
> >>   drm/amdgpu/si: add PCI reset support
> >>   drm/amdgpu/soc15: add PCI reset support
> >>   drm/amdgpu/nv: add PCI reset support
> >>   drm/amdgpu: drop extra drm_kms_helper_poll_enable/disable calls
> >>   drm/amdgpu: use runpm flag rather than fbcon for kfd runtime suspend 
> >> (v2)
> >>   drm/amdgpu: reset runpm flag if device suspend fails
> >>   Revert "drm/amd/display: Update NV1x SR latency values"
> >>   drm/radeon: OLAND boards don't have VCE
> >>   drm/amdgpu: Set reference clock to 100Mhz on Renoir (v2)
> >>
> >> Anthony Koo (1):
> >>   drm/amd/display: [FW Promotion] Release 0.0.51
> >>
> >> Aric Cyr (1):
> >>   drm/amd/display: 3.2.122
> >>
> >> Bhawanpreet Lakha (1):
> >>   drm/amd/display: Fix unused variable warning
> >>
> >> Dale Zhao (1):
> >>   drm/amd/display: fix type mismatch error for return variable
> >>
> >> Derek Lai (1):
> >>   drm/amd/display: Add DIG_CLOCK_PATTERN in the transmitter
> >> control
> >>
> >> Eric Yang (1):
> >>   drm/amd/display: move edp sink present detection to hw init
> >>
> >> Fangzhi Zuo (1):
> >>   drm/amd/display: Add return code instead of boolean for future
> >> use
> >>
> >> Felix Kuehling (1):
> >>   drm/amdkfd: Fix recursive lock warnings
> >>
> >> Gustavo A. 

Re: [PATCH v5 1/2] dt-bindings: usb: add analogix,anx7688.yaml

2021-03-31 Thread Laurent Pinchart
On Tue, Mar 30, 2021 at 05:14:44PM +0200, Enric Balletbo i Serra wrote:
> On 30/3/21 15:35, Dafna Hirschfeld wrote:
> > On 05.03.21 16:19, Laurent Pinchart wrote:
> >> On Fri, Mar 05, 2021 at 04:14:03PM +0100, Dafna Hirschfeld wrote:
> >>> On 05.03.21 15:34, Laurent Pinchart wrote:
>  On Fri, Mar 05, 2021 at 01:43:50PM +0100, Dafna Hirschfeld wrote:
> > ANX7688 is a USB Type-C port controller with a MUX. It converts HDMI 
> > 2.0 to
> > DisplayPort 1.3 Ultra-HDi (4096x2160p60).
> > The integrated crosspoint switch (the MUX) supports USB 3.1 data 
> > transfer
> > along with the DisplayPort Alternate Mode signaling over USB Type-C.
> > Additionally, an on-chip microcontroller (OCM) is available to manage 
> > the
> > signal switching, Channel Configuration (CC) detection, USB Power
> > Delivery (USB-PD), Vendor Defined Message (VDM) protocol support and 
> > other
> > functions as defined in the USB TypeC and USB Power Delivery
> > specifications.
> >
> > ANX7688 is found on Acer Chromebook R13 (elm) and on
> > Pine64 PinePhone.
> >
> > Signed-off-by: Dafna Hirschfeld 
> > ---
> >    .../bindings/usb/analogix,anx7688.yaml    | 177 
> > ++
> >    1 file changed, 177 insertions(+)
> >    create mode 100644
> > Documentation/devicetree/bindings/usb/analogix,anx7688.yaml
> >
> > diff --git a/Documentation/devicetree/bindings/usb/analogix,anx7688.yaml
> > b/Documentation/devicetree/bindings/usb/analogix,anx7688.yaml
> > new file mode 100644
> > index ..6c4dd6b4b28b
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/usb/analogix,anx7688.yaml
> > @@ -0,0 +1,177 @@
> > +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> > +%YAML 1.2
> > +---
> > +$id: http://devicetree.org/schemas/usb/analogix,anx7688.yaml#
> > +$schema: http://devicetree.org/meta-schemas/core.yaml#
> > +
> > +title: Analogix ANX7688 Type-C Port Controller with HDMI to DP 
> > conversion
> > +
> > +maintainers:
> > +  - Nicolas Boichat 
> > +  - Enric Balletbo i Serra 
> > +
> > +description: |
> > +  ANX7688 is a USB Type-C port controller with a MUX. It converts HDMI 
> > 2.0 to
> > +  DisplayPort 1.3 Ultra-HDi (4096x2160p60).
> > +  The integrated crosspoint switch (the MUX) supports USB 3.1 data
> > transfer along with
> > +  the DisplayPort Alternate Mode signaling over USB Type-C. 
> > Additionally,
> > +  an on-chip microcontroller (OCM) is available to manage the signal
> > switching,
> > +  Channel Configuration (CC) detection, USB Power Delivery (USB-PD), 
> > Vendor
> > +  Defined Message (VDM) protocol support and other functions as 
> > defined in
> > the
> > +  USB TypeC and USB Power Delivery specifications.
> > +
> > +
> 
>  Extra blank line ?
> 
> > +properties:
> > +  compatible:
> > +    const: analogix,anx7688
> > +
> > +  reg:
> > +    maxItems: 1
> > +
> > +  avdd33-supply:
> > +    description: 3.3V Analog core supply voltage.
> > +
> > +  dvdd18-supply:
> > +    description: 1.8V Digital I/O supply voltage.
> > +
> > +  avdd18-supply:
> > +    description: 1.8V Analog core power supply voltage.
> > +
> > +  avdd10-supply:
> > +    description: 1.0V Analog core power supply voltage.
> > +
> > +  dvdd10-supply:
> > +    description: 1.0V Digital core supply voltage.
> > +
> 
>  That's lots of supplies. If there's a reasonable chance that some of
>  them will always be driven by the same regulator (especially if the
>  ANX7688 documentation requires that), then they could be grouped. For
>  instance dvdd18-supply and avdd18-supply could be grouped into
>  vdd18-supply. It would still allow us to extend the bindings in a
>  backward compatible way later if a system uses different regulators. You
>  have more information about the hardware than I do, so it's your call.
> > 
> > Can you explain what do you mean by 'grouped' ?
> > Do you mean that instead of having two properties dvdd18-supply and 
> > avdd18-supply
> > I have only one property vdd18-supply?
> 
> You can simplify all this with vdd33, vdd18 vdd10. For the Chromebook case all
> the analogic and digital part are the same regulator just filtered. That's a
> common configuration and if there is some hardware that needs it we can extend
> later.

That's the idea, yes. If in a typical use case multiple supplies are
provided by a single regulator (for some devices that datasheet strongly
recommends that, or event mandates it), then it makes sense to group
those supplies in a single DT supply property. It can always be extended
later indeed, without any backward compatibility issue.

> > +  hdmi5v-supply:
> > +    description: 5V 

Re: [PATCH 2/4] drm/amd/display: Add FPU event trace

2021-03-31 Thread kernel test robot
Hi Rodrigo,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20210331]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3 v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/5859110d0579f7ee57ca1b1840c3960492a9c0c0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout 5859110d0579f7ee57ca1b1840c3960492a9c0c0
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:29:10: fatal error: 
>> asm/fpu/api.h: No such file or directory
  29 | #include 
 |  ^~~
   compilation terminated.


vim +29 drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c

28  
  > 29  #include 
30  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] fix NULL pointer deference crash

2021-03-31 Thread Dan Carpenter
Hi Hassan,

url:
https://github.com/0day-ci/linux/commits/Hassan-Shahbazi/fix-NULL-pointer-deference-crash/20210401-004543
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: x86_64-randconfig-m001-20210330 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 

New smatch warnings:
drivers/video/fbdev/core/fbcon.c:1336 fbcon_cursor() warn: variable 
dereferenced before check 'ops' (see line 1324)

Old smatch warnings:
drivers/video/fbdev/core/fbcon.c:3028 fbcon_get_con2fb_map_ioctl() warn: 
potential spectre issue 'con2fb_map' [r]

vim +/ops +1336 drivers/video/fbdev/core/fbcon.c

^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1318  static void fbcon_cursor(struct vc_data *vc, int mode)
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1319  {
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1320struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1321struct fbcon_ops *ops = info->fbcon_par;
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1322int c = scr_readw((u16 *) vc->vc_pos);
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1323  
2a17d7e80f1df44 drivers/video/console/fbcon.cScot Doyle 2015-08-04 
@1324ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
2a17d7e80f1df44 drivers/video/console/fbcon.cScot Doyle 2015-08-04  
1325  
d1e2306681ad3cb drivers/video/console/fbcon.cMichal Januszewski 2007-05-08  
1326if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1327return;
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1328  
c0e4b3ad67997a6 drivers/video/fbdev/core/fbcon.c Jiri Slaby 2020-06-15  
1329if (vc->vc_cursor_type & CUR_SW)
acba9cd01974353 drivers/video/console/fbcon.cAntonino A. Daplas 2007-07-17  
1330fbcon_del_cursor_timer(info);
a5edce421848442 drivers/video/console/fbcon.cThierry Reding 2015-05-21  
1331else
acba9cd01974353 drivers/video/console/fbcon.cAntonino A. Daplas 2007-07-17  
1332fbcon_add_cursor_timer(info);
acba9cd01974353 drivers/video/console/fbcon.cAntonino A. Daplas 2007-07-17  
1333  
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1334ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;

^
Dereferenced

^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1335  
1d73453653c6d4f drivers/video/fbdev/core/fbcon.c Hassan Shahbazi2021-03-31 
@1336if (ops && ops->cursor)

^^^
Checked too late

06a0df4d1b8b13b drivers/video/fbdev/core/fbcon.c Linus Torvalds 2020-09-08  
1337ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1338get_color(vc, info, c, 0));
^1da177e4c3f415 drivers/video/console/fbcon.cLinus Torvalds 2005-04-16  
1339  }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/4] drm/amd/display: Add FPU event trace

2021-03-31 Thread kernel test robot
Hi Rodrigo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20210331]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3 v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# 
https://github.com/0day-ci/linux/commit/5859110d0579f7ee57ca1b1840c3960492a9c0c0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout 5859110d0579f7ee57ca1b1840c3960492a9c0c0
# save the attached .config to linux build tree
make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:41:6: warning: no 
>> previous prototype for 'dc_fpu_begin' [-Wmissing-prototypes]
  41 | void dc_fpu_begin(const char *function_name, const int line)
 |  ^~~~
>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:57:6: warning: no 
>> previous prototype for 'dc_fpu_end' [-Wmissing-prototypes]
  57 | void dc_fpu_end(const char *function_name, const int line)
 |  ^~


vim +/dc_fpu_begin +41 drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c

30  
31  /**
32   * dc_fpu_begin - Enables FPU protection
33   * @function_name: A string containing the function name for debug 
purposes
34   * @line: A-line number where DC_FP_START was invoked for debug purpose
35   *
36   * This function is responsible for managing the use of 
kernel_fpu_begin() with
37   * the advantage of providing an event trace for debugging.
38   *
39   * Note: Do not call this function directly; always use DC_FP_START().
40   */
  > 41  void dc_fpu_begin(const char *function_name, const int line)
42  {
43  TRACE_DCN_FPU(true, function_name, line);
44  kernel_fpu_begin();
45  }
46  
47  /**
48   * dc_fpu_end - Disable FPU protection
49   * @function_name: A string containing the function name for debug 
purposes
50   * @line: A-line number where DC_FP_END was invoked for debug purpose
51   *
52   * This function is responsible for managing the use of 
kernel_fpu_end() with
53   * the advantage of providing an event trace for debugging.
54   *
55   * Note: Do not call this function directly; always use DC_FP_END().
56   */
  > 57  void dc_fpu_end(const char *function_name, const int line)

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [Freedreno] [PATCH v4 23/24] drm/msm/dsi: inline msm_dsi_phy_set_src_pll

2021-03-31 Thread abhinavk

On 2021-03-31 03:57, Dmitry Baryshkov wrote:

The src_truthtable config is not used for some of phys, which use other
means of configuring the master/slave usecases. Inline this function
with the goal of removing src_pll_id argument in the next commit.

Signed-off-by: Dmitry Baryshkov 
Tested-by: Stephen Boyd  # on sc7180 lazor

Reviewed-by: Abhinav Kumar 

---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c   | 17 -
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.h   |  8 
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c  |  2 --
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c  | 13 +++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c  | 11 +++
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c  | 13 +++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c |  1 -
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c   |  2 --
 8 files changed, 21 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 344887025720..93e81bb78d26 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -461,23 +461,6 @@ int msm_dsi_dphy_timing_calc_v4(struct
msm_dsi_dphy_timing *timing,
return 0;
 }

-void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 
reg,

-   u32 bit_mask)
-{
-   int phy_id = phy->id;
-   u32 val;
-
-   if ((phy_id >= DSI_MAX) || (pll_id >= DSI_MAX))
-   return;
-
-   val = dsi_phy_read(phy->base + reg);
-
-   if (phy->cfg->src_pll_truthtable[phy_id][pll_id])
-   dsi_phy_write(phy->base + reg, val | bit_mask);
-   else
-   dsi_phy_write(phy->base + reg, val & (~bit_mask));
-}
-
 static int dsi_phy_regulator_init(struct msm_dsi_phy *phy)
 {
struct regulator_bulk_data *s = phy->supplies;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 7748f8b5ea53..00ef01baaebd 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -33,12 +33,6 @@ struct msm_dsi_phy_cfg {
unsigned long   min_pll_rate;
unsigned long   max_pll_rate;

-   /*
-* Each cell {phy_id, pll_id} of the truth table indicates
-* if the source PLL selection bit should be set for each PHY.
-* Fill default H/W values in illegal cells, eg. cell {0, 1}.
-*/
-   bool src_pll_truthtable[DSI_MAX][DSI_MAX];
const resource_size_t io_start[DSI_MAX];
const int num_dsi_phy;
const int quirks;
@@ -121,7 +115,5 @@ int msm_dsi_dphy_timing_calc_v3(struct
msm_dsi_dphy_timing *timing,
struct msm_dsi_phy_clk_request *clk_req);
 int msm_dsi_dphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing,
struct msm_dsi_phy_clk_request *clk_req);
-void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 
reg,

-   u32 bit_mask);

 #endif /* __DSI_PHY_H__ */
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
index 655996cf8688..64b8b0efc1a4 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
@@ -921,7 +921,6 @@ static void dsi_10nm_phy_disable(struct msm_dsi_phy 
*phy)

 }

 const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs = {
-   .src_pll_truthtable = { {false, false}, {true, false} },
.has_phy_lane = true,
.reg_cfg = {
.num = 1,
@@ -943,7 +942,6 @@ const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs = {
 };

 const struct msm_dsi_phy_cfg dsi_phy_10nm_8998_cfgs = {
-   .src_pll_truthtable = { {false, false}, {true, false} },
.has_phy_lane = true,
.reg_cfg = {
.num = 1,
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index 090d3e7a2212..9a2937589435 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -947,6 +947,7 @@ static int dsi_14nm_phy_enable(struct msm_dsi_phy
*phy, int src_pll_id,
int ret;
void __iomem *base = phy->base;
void __iomem *lane_base = phy->lane_base;
+   u32 glbl_test_ctrl;

if (msm_dsi_dphy_timing_calc_v2(timing, clk_req)) {
DRM_DEV_ERROR(>pdev->dev,
@@ -994,10 +995,12 @@ static int dsi_14nm_phy_enable(struct
msm_dsi_phy *phy, int src_pll_id,
udelay(100);
dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_CTRL_1, 0x00);

-   msm_dsi_phy_set_src_pll(phy, src_pll_id,
-   REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL,
-   DSI_14nm_PHY_CMN_GLBL_TEST_CTRL_BITCLK_HS_SEL);
-
+	glbl_test_ctrl = dsi_phy_read(base + 
REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL);

+   if (phy->id == DSI_1 && src_pll_id == DSI_0)
+   glbl_test_ctrl |= 

Re: [Freedreno] [PATCH v4 24/24] drm/msm/dsi: stop passing src_pll_id to the phy_enable call

2021-03-31 Thread abhinavk

On 2021-03-31 03:57, Dmitry Baryshkov wrote:
Phy driver already knows the source PLL id basing on the set usecase 
and

the current PLL id. Stop passing it to the phy_enable call. As a
reminder, dsi manager will always use DSI 0 as a clock master in a 
slave

mode, so PLL 0 is always a clocksource for DSI 0 and it is always a
clocksource for DSI 1 too unless DSI 1 is used in the standalone mode.

Signed-off-by: Dmitry Baryshkov 
Tested-by: Stephen Boyd  # on sc7180 lazor

Reviewed-by: Abhinav Kumar 

---
 drivers/gpu/drm/msm/dsi/dsi.h   |  2 +-
 drivers/gpu/drm/msm/dsi/dsi_manager.c   | 11 +--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c   |  4 ++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.h   |  2 +-
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c  |  2 +-
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c  |  4 ++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c  |  4 ++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c  |  4 ++--
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c |  2 +-
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c   |  2 +-
 10 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi.h 
b/drivers/gpu/drm/msm/dsi/dsi.h

index 7f99e12efd52..7abfeab08165 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -162,7 +162,7 @@ struct msm_dsi_phy_clk_request {

 void msm_dsi_phy_driver_register(void);
 void msm_dsi_phy_driver_unregister(void);
-int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
+int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
struct msm_dsi_phy_clk_request *clk_req);
 void msm_dsi_phy_disable(struct msm_dsi_phy *phy);
 void msm_dsi_phy_get_shared_timings(struct msm_dsi_phy *phy,
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c
b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index e116e5ff5d24..cd016576e8c5 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -114,7 +114,7 @@ static int dsi_mgr_setup_components(int id)
return ret;
 }

-static int enable_phy(struct msm_dsi *msm_dsi, int src_pll_id,
+static int enable_phy(struct msm_dsi *msm_dsi,
  struct msm_dsi_phy_shared_timings *shared_timings)
 {
struct msm_dsi_phy_clk_request clk_req;
@@ -123,7 +123,7 @@ static int enable_phy(struct msm_dsi *msm_dsi, int
src_pll_id,

msm_dsi_host_get_phy_clk_req(msm_dsi->host, _req, is_dual_dsi);

-   ret = msm_dsi_phy_enable(msm_dsi->phy, src_pll_id, _req);
+   ret = msm_dsi_phy_enable(msm_dsi->phy, _req);
msm_dsi_phy_get_shared_timings(msm_dsi->phy, shared_timings);

return ret;
@@ -136,7 +136,6 @@ dsi_mgr_phy_enable(int id,
struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
struct msm_dsi *mdsi = dsi_mgr_get_dsi(DSI_CLOCK_MASTER);
struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE);
-   int src_pll_id = IS_DUAL_DSI() ? DSI_CLOCK_MASTER : id;
int ret;

/* In case of dual DSI, some registers in PHY1 have been programmed
@@ -149,11 +148,11 @@ dsi_mgr_phy_enable(int id,
msm_dsi_host_reset_phy(mdsi->host);
msm_dsi_host_reset_phy(sdsi->host);

-   ret = enable_phy(mdsi, src_pll_id,
+   ret = enable_phy(mdsi,
 _timings[DSI_CLOCK_MASTER]);
if (ret)
return ret;
-   ret = enable_phy(sdsi, src_pll_id,
+   ret = enable_phy(sdsi,
 _timings[DSI_CLOCK_SLAVE]);
if (ret) {
msm_dsi_phy_disable(mdsi->phy);
@@ -162,7 +161,7 @@ dsi_mgr_phy_enable(int id,
}
} else {
msm_dsi_host_reset_phy(msm_dsi->host);
-   ret = enable_phy(msm_dsi, src_pll_id, _timings[id]);
+   ret = enable_phy(msm_dsi, _timings[id]);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 93e81bb78d26..f0a2ddf96a4b 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -753,7 +753,7 @@ void __exit msm_dsi_phy_driver_unregister(void)
platform_driver_unregister(_phy_platform_driver);
 }

-int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
+int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
struct msm_dsi_phy_clk_request *clk_req)
 {
struct device *dev = >pdev->dev;
@@ -776,7 +776,7 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
int src_pll_id,
goto reg_en_fail;
}

-   ret = phy->cfg->ops.enable(phy, src_pll_id, clk_req);
+   ret = phy->cfg->ops.enable(phy, clk_req);
if (ret) {
DRM_DEV_ERROR(dev, "%s: phy enable failed, 

Re: [PATCH] /msm/adreno: fix different address spaces warning

2021-03-31 Thread kernel test robot
Hi Bernard,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.12-rc5 next-20210331]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Bernard-Zhao/msm-adreno-fix-different-address-spaces-warning/20210331-212535
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: arm64-randconfig-r011-20210330 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 
3a6365a439ede4b7c65076bb42b1b7dbf72216b5)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/ba5ad7c05994836bcb59fd6d7b5b70c8b553ea56
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Bernard-Zhao/msm-adreno-fix-different-address-spaces-warning/20210331-212535
git checkout ba5ad7c05994836bcb59fd6d7b5b70c8b553ea56
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:189:2: error: invalid operands 
>> to binary expression ('void *' and 'int')
   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
   
^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: note: expanded from 
macro 'cxdbg_write'
   msm_writel((val), (ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:2: error: invalid operands 
to binary expression ('void *' and 'int')
   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
   
^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: note: expanded from 
macro 'cxdbg_write'
   msm_writel((val), (ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:2: error: invalid operands 
to binary expression ('void *' and 'int')
   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
   
^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: note: expanded from 
macro 'cxdbg_write'
   msm_writel((val), (ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:2: error: invalid operands 
to binary expression ('void *' and 'int')
   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
   
^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: note: expanded from 
macro 'cxdbg_write'
   msm_writel((val), (ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:12: error: invalid operands 
to binary expression ('void *' and 'int')
   data[0] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 
^
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:180:30: note: expanded from 
macro 'cxdbg_read'
   msm_readl((ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:12: error: invalid operands 
to binary expression ('void *' and 'int')
   data[1] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 
^
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:180:30: note: expanded from 
macro 'cxdbg_read'
   msm_readl((ptr) + ((offset) << 2))
   ^  ~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:356:3: error: invalid operands 
to binary expression ('void *' and 'int')
   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
   
^

[PATCH v2 2/2] drm/panel: simple: Add support for EDT ETM0350G0DH6 panel

2021-03-31 Thread Yunus Bas
From: Stefan Riedmueller 

This patch adds support for the EDT ETM0350G0DH6 3.5" (320x240) lcd
panel to DRM simple panel driver.

Signed-off-by: Stefan Riedmueller 
Signed-off-by: Yunus Bas 
---
Changes in v2:
- Splitted dt-bindings to separate patch
---
 drivers/gpu/drm/panel/panel-simple.c | 29 
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 283c17a75376..70c25f6e642b 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1847,6 +1847,32 @@ static const struct panel_desc edt_et035012dm6 = {
.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 };
 
+static const struct drm_display_mode edt_etm0350g0dh6_mode = {
+   .clock = 6520,
+   .hdisplay = 320,
+   .hsync_start = 320 + 20,
+   .hsync_end = 320 + 20 + 68,
+   .htotal = 320 + 20 + 68,
+   .vdisplay = 240,
+   .vsync_start = 240 + 4,
+   .vsync_end = 240 + 4 + 18,
+   .vtotal = 240 + 4 + 18,
+   .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc edt_etm0350g0dh6 = {
+   .modes = _etm0350g0dh6_mode,
+   .num_modes = 1,
+   .bpc = 6,
+   .size = {
+   .width = 70,
+   .height = 53,
+   },
+   .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+   .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+   .connector_type = DRM_MODE_CONNECTOR_DPI,
+};
+
 static const struct drm_display_mode edt_etm043080dh6gp_mode = {
.clock = 10870,
.hdisplay = 480,
@@ -4243,6 +4269,9 @@ static const struct of_device_id platform_of_match[] = {
}, {
.compatible = "edt,et035012dm6",
.data = _et035012dm6,
+   }, {
+   .compatible = "edt,etm0350g0dh6",
+   .data = _etm0350g0dh6,
}, {
.compatible = "edt,etm043080dh6gp",
.data = _etm043080dh6gp,
-- 
2.30.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 1/2] drm/panel: simple: Add support for EDT ETMV570G2DHU panel

2021-03-31 Thread Yunus Bas
From: Stefan Riedmueller 

This patch adds support for the EDT ETMV570G2DHU 5.7" (640x480) lcd panel
to DRM simple panel driver.

Signed-off-by: Stefan Riedmueller 
Signed-off-by: Yunus Bas 
---
Changes in v2:
- Splitted dt-bindings to separate patch
---
 drivers/gpu/drm/panel/panel-simple.c | 29 
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 4e2dad314c79..283c17a75376 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1926,6 +1926,32 @@ static const struct panel_desc edt_et057090dhu = {
.connector_type = DRM_MODE_CONNECTOR_DPI,
 };
 
+static const struct drm_display_mode edt_etmv570g2dhu_mode = {
+   .clock = 25175,
+   .hdisplay = 640,
+   .hsync_start = 640,
+   .hsync_end = 640 + 16,
+   .htotal = 640 + 16 + 30 + 114,
+   .vdisplay = 480,
+   .vsync_start = 480 + 10,
+   .vsync_end = 480 + 10 + 3,
+   .vtotal = 480 + 10 + 3 + 35,
+   .flags = DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_PHSYNC,
+};
+
+static const struct panel_desc edt_etmv570g2dhu = {
+   .modes = _etmv570g2dhu_mode,
+   .num_modes = 1,
+   .bpc = 6,
+   .size = {
+   .width = 115,
+   .height = 86,
+   },
+   .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+   .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+   .connector_type = DRM_MODE_CONNECTOR_DPI,
+};
+
 static const struct drm_display_mode edt_etm0700g0dh6_mode = {
.clock = 33260,
.hdisplay = 800,
@@ -4226,6 +4252,9 @@ static const struct of_device_id platform_of_match[] = {
}, {
.compatible = "edt,et057090dhu",
.data = _et057090dhu,
+   }, {
+   .compatible = "edt,etmv570g2dhu",
+   .data = _etmv570g2dhu,
}, {
.compatible = "edt,et070080dh6",
.data = _etm0700g0dh6,
-- 
2.30.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] /msm/adreno: fix different address spaces warning

2021-03-31 Thread kernel test robot
Hi Bernard,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.12-rc5 next-20210331]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Bernard-Zhao/msm-adreno-fix-different-address-spaces-warning/20210331-212535
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: arm-defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/ba5ad7c05994836bcb59fd6d7b5b70c8b553ea56
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Bernard-Zhao/msm-adreno-fix-different-address-spaces-warning/20210331-212535
git checkout ba5ad7c05994836bcb59fd6d7b5b70c8b553ea56
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c: In function 'cx_debugbus_read':
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writel((val), (ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:189:2: note: in expansion of 
macro 'cxdbg_write'
 189 |  cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 |  ^~~
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writel((val), (ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:2: note: in expansion of 
macro 'cxdbg_write'
 190 |  cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 |  ^~~
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writel((val), (ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:2: note: in expansion of 
macro 'cxdbg_write'
 191 |  cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 |  ^~~
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writel((val), (ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:2: note: in expansion of 
macro 'cxdbg_write'
 192 |  cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 |  ^~~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:180:30: error: invalid operands 
to binary << (have 'void *' and 'int')
 180 |  msm_readl((ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:12: note: in expansion of 
macro 'cxdbg_read'
 197 |  data[0] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 |^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:180:30: error: invalid operands 
to binary << (have 'void *' and 'int')
 180 |  msm_readl((ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:12: note: in expansion of 
macro 'cxdbg_read'
 198 |  data[1] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 |^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c: In function 'a6xx_get_debugbus':
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writel((val), (ptr) + ((offset) << 2))
 |  ^~
   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:356:3: note: in expansion of 
macro 'cxdbg_write'
 356 |   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 |   ^~~
>> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:177:38: error: invalid operands 
>> to binary << (have 'void *' and 'int')
 177 |  msm_writ

Re: [Freedreno] [v1] drm/msm/disp/dpu1: icc path needs to be set before dpu runtime resume

2021-03-31 Thread Steev Klimaszewski

On 3/31/21 7:34 AM, kalya...@codeaurora.org wrote:
> On 2021-03-31 00:04, Steev Klimaszewski wrote:
>> On 3/22/21 4:17 AM, Kalyan Thota wrote:
>>> From: Kalyan Thota 
>>>
>>> DPU runtime resume will request for a min vote on the AXI bus as
>>> it is a necessary step before turning ON the AXI clock.
>>>
> Hi Steev,
>
> The WARN_ON is true only for the device with compatible
> "qcom,sc7180-mdss". For other devices its a
> false alarm. Can you please try with the below change ?
>
> https://patchwork.kernel.org/project/linux-arm-msm/patch/1617190020-7931-1-git-send-email-kalya...@codeaurora.org/
>
>
> Thanks,
> Kalyan
>
Hi Kalyan,

Tested here, and it does get rid of the warning.  I'll keep a copy of
the patch locally, since this is going to hit stable too at some point
it seems, at least until another version comes out addressing the other
comments from people way smarter than me.

-- steev

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] fix NULL pointer deference crash

2021-03-31 Thread Greg KH
On Wed, Mar 31, 2021 at 07:34:29PM +0300, Hassan Shahbazi wrote:
> The patch has fixed a NULL pointer deference crash in hiding the cursor. It 
> is verified by syzbot patch tester.
> 
> Reported by: syzbot
> https://syzkaller.appspot.com/bug?id=defb47bf56e1c14d5687280c7bb91ce7b608b94b
> 
> Signed-off-by: Hassan Shahbazi 
> ---
>  drivers/video/fbdev/core/fbcon.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/fbcon.c 
> b/drivers/video/fbdev/core/fbcon.c
> index 44a5cd2f54cc..ee252d1c43c6 100644
> --- a/drivers/video/fbdev/core/fbcon.c
> +++ b/drivers/video/fbdev/core/fbcon.c
> @@ -1333,8 +1333,9 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
>  
>   ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
>  
> - ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
> - get_color(vc, info, c, 0));
> + if (ops && ops->cursor)

As ops obviously is not NULL here (you just used it on the line above),
why are you checking it again?

And what makes curser be NULL here?  How can that happen?

Also your subject line can use some work, please make it reflect the
driver subsystem you are looking at.

thanks,

greg k-h
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [Freedreno] [PATCH v4 17/24] drm/msm/dsi: make save_state/restore_state callbacks accept msm_dsi_phy

2021-03-31 Thread abhinavk

On 2021-03-31 03:57, Dmitry Baryshkov wrote:

Make save_state/restore callbacks accept struct msm_dsi_phy rather than
struct msm_dsi_pll. This moves them to struct msm_dsi_phy_ops, allowing
us to drop struct msm_dsi_pll_ops.

Signed-off-by: Dmitry Baryshkov 
Tested-by: Stephen Boyd  # on sc7180 lazor

Reviewed-by: Abhinav Kumar 

---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 12 +++
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 11 +++---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c| 24 ++---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c| 24 ++---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c| 34 ---
 .../gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c   | 18 +-
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 24 ++---
 7 files changed, 64 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index a1360e2dad3b..2c5ccead3baa 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -858,9 +858,9 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy 
*phy,


 void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy)
 {
-   if (phy->cfg->pll_ops.save_state) {
-   phy->cfg->pll_ops.save_state(phy->pll);
-   phy->pll->state_saved = true;
+   if (phy->cfg->ops.save_pll_state) {
+   phy->cfg->ops.save_pll_state(phy);
+   phy->state_saved = true;
}
 }

@@ -868,12 +868,12 @@ int msm_dsi_phy_pll_restore_state(struct 
msm_dsi_phy *phy)

 {
int ret;

-   if (phy->cfg->pll_ops.restore_state && phy->pll->state_saved) {
-   ret = phy->cfg->pll_ops.restore_state(phy->pll);
+   if (phy->cfg->ops.restore_pll_state && phy->state_saved) {
+   ret = phy->cfg->ops.restore_pll_state(phy);
if (ret)
return ret;

-   phy->pll->state_saved = false;
+   phy->state_saved = false;
}

return 0;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index b477d21804c8..0b51828c3146 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -17,7 +17,6 @@
 struct msm_dsi_pll {
struct clk_hw   clk_hw;
boolpll_on;
-   boolstate_saved;

const struct msm_dsi_phy_cfg *cfg;
 };
@@ -29,17 +28,13 @@ struct msm_dsi_phy_ops {
int (*enable)(struct msm_dsi_phy *phy, int src_pll_id,
struct msm_dsi_phy_clk_request *clk_req);
void (*disable)(struct msm_dsi_phy *phy);
-};
-
-struct msm_dsi_pll_ops {
-   void (*save_state)(struct msm_dsi_pll *pll);
-   int (*restore_state)(struct msm_dsi_pll *pll);
+   void (*save_pll_state)(struct msm_dsi_phy *phy);
+   int (*restore_pll_state)(struct msm_dsi_phy *phy);
 };

 struct msm_dsi_phy_cfg {
struct dsi_reg_config reg_cfg;
struct msm_dsi_phy_ops ops;
-   const struct msm_dsi_pll_ops pll_ops;

unsigned long   min_pll_rate;
unsigned long   max_pll_rate;
@@ -115,6 +110,8 @@ struct msm_dsi_phy {
struct msm_dsi_pll *pll;

struct clk_hw_onecell_data *provided_clocks;
+
+   bool state_saved;
 };

 /*
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
index 91ae0f8dbd88..fefff08f83fd 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
@@ -518,9 +518,9 @@ static const struct clk_ops 
clk_ops_dsi_pll_10nm_vco = {

  * PLL Callbacks
  */

-static void dsi_pll_10nm_save_state(struct msm_dsi_pll *pll)
+static void dsi_10nm_pll_save_state(struct msm_dsi_phy *phy)
 {
-   struct dsi_pll_10nm *pll_10nm = to_pll_10nm(pll);
+   struct dsi_pll_10nm *pll_10nm = to_pll_10nm(phy->pll);
struct pll_10nm_cached_state *cached = _10nm->cached_state;
void __iomem *phy_base = pll_10nm->phy_cmn_mmio;
u32 cmn_clk_cfg0, cmn_clk_cfg1;
@@ -541,9 +541,9 @@ static void dsi_pll_10nm_save_state(struct 
msm_dsi_pll *pll)

cached->pix_clk_div, cached->pll_mux);
 }

-static int dsi_pll_10nm_restore_state(struct msm_dsi_pll *pll)
+static int dsi_10nm_pll_restore_state(struct msm_dsi_phy *phy)
 {
-   struct dsi_pll_10nm *pll_10nm = to_pll_10nm(pll);
+   struct dsi_pll_10nm *pll_10nm = to_pll_10nm(phy->pll);
struct pll_10nm_cached_state *cached = _10nm->cached_state;
void __iomem *phy_base = pll_10nm->phy_cmn_mmio;
u32 val;
@@ -562,7 +562,9 @@ static int dsi_pll_10nm_restore_state(struct
msm_dsi_pll *pll)
val |= cached->pll_mux;
pll_write(phy_base + REG_DSI_10nm_PHY_CMN_CLK_CFG1, val);

-   ret = dsi_pll_10nm_vco_set_rate(>clk_hw,
pll_10nm->vco_current_rate, pll_10nm->vco_ref_clk_rate);
+   ret = dsi_pll_10nm_vco_set_rate(>pll->clk_hw,
+   

Re: [PATCH 1/4] drm/amd/display: Introduce FPU directory inside DC

2021-03-31 Thread kernel test robot
Hi Rodrigo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20210331]
[also build test WARNING on v5.12-rc5]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/c4d5d1d0a04f13014a22e6932ddf8487bb130d34
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout c4d5d1d0a04f13014a22e6932ddf8487bb130d34
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/dc/fpu_operations/dcn2x.c:84:6: 
>> warning: no previous prototype for 
>> 'dcn20_populate_dml_writeback_from_context' [-Wmissing-prototypes]
  84 | void dcn20_populate_dml_writeback_from_context(struct dc *dc,
 |  ^


vim +/dcn20_populate_dml_writeback_from_context +84 
drivers/gpu/drm/amd/amdgpu/../display/dc/fpu_operations/dcn2x.c

83  
  > 84  void dcn20_populate_dml_writeback_from_context(struct dc *dc,
85  struct resource_context *res_ctx, display_e2e_pipe_params_st 
*pipes)
86  {
87  _dcn20_populate_dml_writeback_from_context(dc, res_ctx, pipes);
88  }
89  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] amd/amdgpu: code refactoring to clean code style a bit

2021-03-31 Thread Bernard Zhao
Fix checkpatch.pl warning:
Too many leading tabs - consider code refactoring
WARNING: Too many leading tabs - consider code refactoring
+   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {

WARNING: Too many leading tabs - consider code refactoring
+   if (vbios_voltage_id <= 
leakage_bin[j]) {

WARNING: Too many leading tabs - consider code refactoring
+   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {

WARNING: Too many leading tabs - consider code refactoring
+   if (vbios_voltage_id <= 
leakage_bin[j]) {

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 84 
 1 file changed, 35 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 86add0f4ea4d..9968ff8ddc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1283,65 +1283,51 @@ int 
amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct amdgpu_devic
profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
(adev->mode_info.atom_context->bios + data_offset);
 
-   switch (frev) {
-   case 1:
+   if ((frev != 2) || (crev != 1)) {
+   DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
return -EINVAL;
-   case 2:
-   switch (crev) {
-   case 1:
-   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
-   return -EINVAL;
-   leakage_bin = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usLeakageBinArrayOffset));
-   vddc_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
-   vddc_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
-   vddci_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
-   vddci_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
-
-   if (profile->ucElbVDDC_Num > 0) {
-   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
-   if (vddc_id_buf[i] == 
virtual_voltage_id) {
-   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {
-   if (vbios_voltage_id <= 
leakage_bin[j]) {
-   *vddc = 
vddc_buf[j * profile->ucElbVDDC_Num + i];
-   break;
-   }
-   }
+   }
+
+   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
+   return -EINVAL;
+
+   leakage_bin = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usLeakageBinArrayOffset));
+   vddc_id_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
+   vddc_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
+   vddci_id_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset 
+
+le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
+   vddci_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
+
+   if (profile->ucElbVDDC_Num > 0) {
+   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
+   if (vddc_id_buf[i] == virtual_voltage_id) {
+   for (j = 0; j < profile->ucLeakageBinNum; j++) {
+   if (vbios_voltage_id <= leakage_bin[j]) 
{
+   *vddc = vddc_buf[j * 
profile->ucElbVDDC_Num + i];
break;
}
}
+   break;
}
-  

[PATCH] /msm/adreno: fix different address spaces warning

2021-03-31 Thread Bernard Zhao
Fixes the following sparse warnings:
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:189:9:expected void [noderef] 
__iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:189:9:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:9: warning: incorrect type in 
argument 2 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:9:expected void [noderef] 
__iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:9:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:9: warning: incorrect type in 
argument 2 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:9:expected void [noderef] 
__iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:9:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:9: warning: incorrect type in 
argument 2 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:9:expected void [noderef] 
__iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:9:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:19: warning: incorrect type in 
argument 1 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:19:expected void const 
[noderef] __iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:19:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:19: warning: incorrect type in 
argument 1 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:19:expected void const 
[noderef] __iomem *addr
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:19:got void *
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:315:41: warning: incorrect type in 
argument 1 (different address spaces)
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:315:41:expected void *[noderef] 
__iomem cxdbg
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:315:41:got void [noderef] 
__iomem *cxdbg
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:189:9: warning: dereference of 
noderef expression
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:190:9: warning: dereference of 
noderef expression
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:191:9: warning: dereference of 
noderef expression
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:192:9: warning: dereference of 
noderef expression
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:197:19: warning: dereference of 
noderef expression
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:198:19: warning: dereference of 
noderef expression

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 36 ++---
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index c1699b4f9a89..e5558d09ddf9 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -186,16 +186,16 @@ static int cx_debugbus_read(void *__iomem cxdbg, u32 
block, u32 offset,
u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+   cxdbg_write(cxdbg, (void __iomem *)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, 
reg);
+   cxdbg_write(cxdbg, (void __iomem *)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, 
reg);
+   cxdbg_write(cxdbg, (void __iomem *)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, 
reg);
+   cxdbg_write(cxdbg, (void __iomem *)REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, 
reg);
 
/* Wait 1 us to make sure the data is flowing */
udelay(1);
 
-   data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
-   data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
+   data[0] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
+   data[1] = cxdbg_read(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 
return 2;
 }
@@ -353,26 +353,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg = ioremap(res->start, resource_size(res));
 
if (cxdbg) {
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
+   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
+   cxdbg_write(cxdbg, (void __iomem 
*)REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
-   

[PATCH] fix NULL pointer deference crash

2021-03-31 Thread Hassan Shahbazi
The patch has fixed a NULL pointer deference crash in hiding the cursor. It 
is verified by syzbot patch tester.

Reported by: syzbot
https://syzkaller.appspot.com/bug?id=defb47bf56e1c14d5687280c7bb91ce7b608b94b

Signed-off-by: Hassan Shahbazi 
---
 drivers/video/fbdev/core/fbcon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 44a5cd2f54cc..ee252d1c43c6 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1333,8 +1333,9 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
 
ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
 
-   ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
-   get_color(vc, info, c, 0));
+   if (ops && ops->cursor)
+   ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
+   get_color(vc, info, c, 0));
 }
 
 static int scrollback_phys_max = 0;
-- 
2.26.3

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] drm/ttm: cleanup coding style a bit

2021-03-31 Thread Bernard Zhao
Fix sparse warning:
drivers/gpu/drm/ttm/ttm_bo.c:52:1: warning: symbol 'ttm_global_mutex' was not 
declared. Should it be static?
drivers/gpu/drm/ttm/ttm_bo.c:53:10: warning: symbol 'ttm_bo_glob_use_count' was 
not declared. Should it be static?

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 101a68dc615b..eab21643edfb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -49,8 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj);
 /*
  * ttm_global_mutex - protecting the global BO state
  */
-DEFINE_MUTEX(ttm_global_mutex);
-unsigned ttm_bo_glob_use_count;
+static DEFINE_MUTEX(ttm_global_mutex);
+static unsigned int ttm_bo_glob_use_count;
 struct ttm_bo_global ttm_bo_glob;
 EXPORT_SYMBOL(ttm_bo_glob);
 
-- 
2.31.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] msm/disp: dpu_plane cleanup-coding-style-a-bit

2021-03-31 Thread Bernard Zhao
Fix sparse warning:
drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1195:41: warning: Using plain integer 
as NULL pointer

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index f898a8f67b7f..687a57850405 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -1223,7 +1223,7 @@ static void _dpu_plane_atomic_disable(struct drm_plane 
*plane)
 {
struct dpu_plane *pdpu = to_dpu_plane(plane);
struct drm_plane_state *state = plane->state;
-   struct dpu_plane_state *pstate = to_dpu_plane_state(state);
+   struct dpu_plane_state *pstate = (struct dpu_plane_state 
*)to_dpu_plane_state(state);
 
trace_dpu_plane_disable(DRMID(plane), is_dpu_plane_virtual(plane),
pstate->multirect_mode);
-- 
2.31.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] drm/amd: use kmalloc_array over kmalloc with multiply

2021-03-31 Thread Bernard Zhao
Fix patch check warning:
WARNING: Prefer kmalloc_array over kmalloc with multiply
+   buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 17d1736367ea..246522423559 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -81,7 +81,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char 
__user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
 
-   buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);
+   buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
 
-- 
2.31.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] drm/amd: cleanup coding style a bit

2021-03-31 Thread Bernard Zhao
Fix patch check warning:
WARNING: suspect code indent for conditional statements (8, 17)
+   if (obj && obj->use < 0) {
+DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);

WARNING: braces {} are not necessary for single statement blocks
+   if (obj && obj->use < 0) {
+DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);
+   }

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1fb2a91ad30a..43d17b72c265 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -449,11 +449,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 
 static inline void put_obj(struct ras_manager *obj)
 {
-   if (obj && --obj->use == 0)
+   if (obj && (--obj->use == 0))
list_del(>node);
-   if (obj && obj->use < 0) {
-DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);
-   }
+   if (obj && (obj->use < 0))
+   DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name);
 }
 
 /* make one obj and return it. */
-- 
2.31.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] drm/nouveau: cleanup-coding-style-a-bit

2021-03-31 Thread Bernard Zhao
This change is to make the code a bit readable.

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c | 44 ++---
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c 
b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
index 57df997c5ff3..053e86845d63 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
@@ -41,9 +41,9 @@ static int
 wndwc37e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 13);
 
-   if ((ret = PUSH_WAIT(push, 13)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CSC_RED2RED, asyw->csc.matrix, 12);
@@ -54,9 +54,9 @@ static int
 wndwc37e_ilut_clr(struct nv50_wndw *wndw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 2);
 
-   if ((ret = PUSH_WAIT(push, 2)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CONTEXT_DMA_INPUT_LUT, 0x);
@@ -67,9 +67,9 @@ static int
 wndwc37e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 4);
 
-   if ((ret = PUSH_WAIT(push, 4)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CONTROL_INPUT_LUT,
@@ -100,9 +100,9 @@ int
 wndwc37e_blend_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 8);
 
-   if ((ret = PUSH_WAIT(push, 8)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_COMPOSITION_CONTROL,
@@ -145,9 +145,9 @@ int
 wndwc37e_image_clr(struct nv50_wndw *wndw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 4);
 
-   if ((ret = PUSH_WAIT(push, 4)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_PRESENT_CONTROL,
@@ -162,9 +162,9 @@ static int
 wndwc37e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 17);
 
-   if ((ret = PUSH_WAIT(push, 17)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_PRESENT_CONTROL,
@@ -215,9 +215,9 @@ int
 wndwc37e_ntfy_clr(struct nv50_wndw *wndw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 2);
 
-   if ((ret = PUSH_WAIT(push, 2)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CONTEXT_DMA_NOTIFIER, 0x);
@@ -228,9 +228,9 @@ int
 wndwc37e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 3);
 
-   if ((ret = PUSH_WAIT(push, 3)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CONTEXT_DMA_NOTIFIER, asyw->ntfy.handle,
@@ -245,9 +245,9 @@ int
 wndwc37e_sema_clr(struct nv50_wndw *wndw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 2);
 
-   if ((ret = PUSH_WAIT(push, 2)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_CONTEXT_DMA_SEMAPHORE, 0x);
@@ -258,9 +258,9 @@ int
 wndwc37e_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 5);
 
-   if ((ret = PUSH_WAIT(push, 5)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_SEMAPHORE_CONTROL, asyw->sema.offset,
@@ -274,9 +274,9 @@ int
 wndwc37e_update(struct nv50_wndw *wndw, u32 *interlock)
 {
struct nvif_push *push = wndw->wndw.push;
-   int ret;
+   int ret = PUSH_WAIT(push, 5);
 
-   if ((ret = PUSH_WAIT(push, 5)))
+   if (ret)
return ret;
 
PUSH_MTHD(push, NVC37E, SET_INTERLOCK_FLAGS, 
interlock[NV50_DISP_INTERLOCK_CURS] << 1 |
-- 
2.31.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v5 1/2] dt-bindings: usb: add analogix,anx7688.yaml

2021-03-31 Thread Dafna Hirschfeld

Hi,

On 05.03.21 18:24, Ondřej Jirman wrote:

Hello Dafna,

On Fri, Mar 05, 2021 at 04:14:03PM +0100, Dafna Hirschfeld wrote:

Hi

On 05.03.21 15:34, Laurent Pinchart wrote:

Hi Dafna,

Thank you for the patch.

On Fri, Mar 05, 2021 at 01:43:50PM +0100, Dafna Hirschfeld wrote:

ANX7688 is a USB Type-C port controller with a MUX. It converts HDMI 2.0 to
DisplayPort 1.3 Ultra-HDi (4096x2160p60).
The integrated crosspoint switch (the MUX) supports USB 3.1 data transfer
along with the DisplayPort Alternate Mode signaling over USB Type-C.
Additionally, an on-chip microcontroller (OCM) is available to manage the
signal switching, Channel Configuration (CC) detection, USB Power
Delivery (USB-PD), Vendor Defined Message (VDM) protocol support and other
functions as defined in the USB TypeC and USB Power Delivery
specifications.

ANX7688 is found on Acer Chromebook R13 (elm) and on
Pine64 PinePhone.


Thanks for your work on the bindings. :) It would be great to find something
acceptable for mainlining.

A few comments based on my experience implementing the USB-PD part for PinePhone
are bellow.


+properties:
+  compatible:
+const: analogix,anx7688
+
+  reg:
+maxItems: 1
+
+  avdd33-supply:
+description: 3.3V Analog core supply voltage.
+
+  dvdd18-supply:
+description: 1.8V Digital I/O supply voltage.
+
+  avdd18-supply:
+description: 1.8V Analog core power supply voltage.
+
+  avdd10-supply:
+description: 1.0V Analog core power supply voltage.
+
+  dvdd10-supply:
+description: 1.0V Digital core supply voltage.
+


That's lots of supplies. If there's a reasonable chance that some of
them will always be driven by the same regulator (especially if the
ANX7688 documentation requires that), then they could be grouped. For
instance dvdd18-supply and avdd18-supply could be grouped into
vdd18-supply. It would still allow us to extend the bindings in a
backward compatible way later if a system uses different regulators. You
have more information about the hardware than I do, so it's your call.


On PinePhone, AVDD and DVDD for 1.0V are just separately fitlered outputs
from the same regulator. So it would work there to just use vdd18 and
vdd10. The same is true for reference design, so it's probably safe
to assume this can be simplified.


+  hdmi5v-supply:
+description: 5V power supply for the HDMI.
+
+  hdmi_vt-supply:
+description: Termination voltage for HDMI input.


Maybe hdmi-vt-supply ?


+
+  clocks:
+description: The input clock specifier.
+maxItems: 1


How about

  items:
- description: The input clock specifier.


+
+  clock-names:
+items:
+  - const: xtal
+
+  hpd-gpios:
+description: |
+  In USB Type-C applications, DP_HPD has no use. In standard DisplayPort
+  applications, DP_HPD is used as DP hot-plug.
+maxItems: 1


On PinePhone this is wired to a HDMI port on the SoC, and HPD is handled by the
sun4i HDMI DRM driver. Seems like HPD will be handled by HDMI controller on
many/all? other platforms too. Why have it here?


Right, I didn't have the full picture when listing all the pins of the anx7688.
I was not sure what should be listed.
I will remove that.




+  enable-gpios:
+description: Chip power down control. No internal pull-down or pull-up 
resistor.
+maxItems: 1
+
+  reset-gpios:
+description: Reset input signal. Active low.
+maxItems: 1
+
+  vbus-det-gpios:
+description: |
+  An input gpio for VBUS detection and high voltage detection,
+  external resistance divide VBUS voltage to 1/8.
+maxItems: 1


Why have this in the bindings? It seems that this is handled internally by the
ANX7688 via OCM firmware. And it's not really gpio either, it's an analog input
with AD converter hooked to it internally.


I will remove that.




+  interrupts:
+description: |
+  The interrupt notifies 4 possible events - TCPC ALERT int, PD int, DP 
int, HDMI int.
+maxItems: 1
+
+  cabledet-gpios:
+description: An output gpio, indicates by the device that a cable is 
plugged.
+maxItems: 1
+
+  vbus-ctrl-gpios:
+description:
+  External VBUS power path. Enable VBUS source and disable VBUS sink or 
vice versa.
+maxItems: 1


VBUS control seems to be already modelled by the usb-connector bindings. Why
have this here?


dito



+  vconn-en1-gpios:
+description: Controls the VCONN switch on the CC1 pin.
+maxItems: 1
+
+  vconn-en2-gpios:
+description: Controls the VCONN switch on the CC2 pin.
+maxItems: 1


VCONN is a voltage regulator that can be enabled either on CC1 or CC2
pin, or disabled completely. This control is *partially* performed in reference
design directly by the OCM. OCM only decides which CC pin to switch
the VCONN regulator to, and informs the SoC when the base VCONN regulator
for the switches needs to be enabled.

So vconn-en1/2 gpios are irrelevant to the driver, but the driver needs
to control VCONN power supply somehow and defer control over 

Re: [PATCH v2 05/14] drm/bridge: ti-sn65dsi86: Move MIPI detach() / unregister() to detach()

2021-03-31 Thread Doug Anderson
Hi,

On Wed, Mar 31, 2021 at 2:53 AM Andrzej Hajda  wrote:
>
>
> W dniu 30.03.2021 o 04:53, Douglas Anderson pisze:
> > The register() / attach() for MIPI happen in the bridge's
> > attach(). That means that the inverse belongs in the bridge's
> > detach().
>
>
> As I commented in previous patch, it would be better to fix mipi/bridge
> registration order in host and this driver.
>
> Have you considered this?

Fair enough. How about I drop this patch at the moment? My series
already has enough stuff in it right now and I don't believe anything
in the series depends on this patch.

-Doug
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v2 0/8] drm/edid: overhaul displayid iterator

2021-03-31 Thread Jani Nikula
On Mon, 29 Mar 2021, Jani Nikula  wrote:
> v2 of [1], addressing Ville's review comments, and adding a couple of
> extra patches on top.
>
> BR,
> Jani.
>
> [1] https://patchwork.freedesktop.org/series/87802/

Pushed to drm-misc-next, with Ville's review and Maxime's IRC nod of
approval. Thanks. :)

BR,
Jani.

>
>
> Jani Nikula (8):
>   drm/edid: make a number of functions, parameters and variables const
>   drm/displayid: add separate drm_displayid.c
>   drm/displayid: add new displayid section/block iterators
>   drm/edid: use the new displayid iterator for detailed modes
>   drm/edid: use the new displayid iterator for finding CEA extension
>   drm/edid: use the new displayid iterator for tile info
>   drm/displayid: allow data blocks with 0 payload length
>   drm/displayid: rename displayid_hdr to displayid_header
>
>  drivers/gpu/drm/Makefile|   2 +-
>  drivers/gpu/drm/drm_displayid.c | 132 
>  drivers/gpu/drm/drm_edid.c  | 171 +++-
>  include/drm/drm_displayid.h |  30 --
>  include/drm/drm_edid.h  |   3 +
>  5 files changed, 196 insertions(+), 142 deletions(-)
>  create mode 100644 drivers/gpu/drm/drm_displayid.c

-- 
Jani Nikula, Intel Open Source Graphics Center
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 16/18] iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE

2021-03-31 Thread Robin Murphy

On 2021-03-16 15:38, Christoph Hellwig wrote:
[...]

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f1e38526d5bd40..996dfdf9d375dd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2017,7 +2017,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain 
*domain,
.iommu_dev  = smmu->dev,
};
  
-	if (smmu_domain->non_strict)

+   if (!iommu_get_dma_strict())


As Will raised, this also needs to be checking "domain->type == 
IOMMU_DOMAIN_DMA" to maintain equivalent behaviour to the attribute code 
below.



pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
  
  	pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain);

@@ -2449,52 +2449,6 @@ static struct iommu_group *arm_smmu_device_group(struct 
device *dev)
return group;
  }
  
-static int arm_smmu_domain_get_attr(struct iommu_domain *domain,

-   enum iommu_attr attr, void *data)
-{
-   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-
-   switch (domain->type) {
-   case IOMMU_DOMAIN_DMA:
-   switch (attr) {
-   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   *(int *)data = smmu_domain->non_strict;
-   return 0;
-   default:
-   return -ENODEV;
-   }
-   break;
-   default:
-   return -EINVAL;
-   }
-}

[...]

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index f985817c967a25..edb1de479dd1a7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -668,7 +668,6 @@ struct arm_smmu_domain {
struct mutexinit_mutex; /* Protects smmu pointer */
  
  	struct io_pgtable_ops		*pgtbl_ops;

-   boolnon_strict;
atomic_tnr_ats_masters;
  
  	enum arm_smmu_domain_stage	stage;

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 0aa6d667274970..3dde22b1f8ffb0 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -761,6 +761,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
.iommu_dev  = smmu->dev,
};
  
+	if (!iommu_get_dma_strict())


Ditto here.

Sorry for not spotting that sooner :(

Robin.


+   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
if (smmu->impl && smmu->impl->init_context) {
ret = smmu->impl->init_context(smmu_domain, _cfg, dev);
if (ret)

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 16/18] iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE

2021-03-31 Thread Robin Murphy

On 2021-03-31 16:32, Will Deacon wrote:

On Wed, Mar 31, 2021 at 02:09:37PM +0100, Robin Murphy wrote:

On 2021-03-31 12:49, Will Deacon wrote:

On Tue, Mar 30, 2021 at 05:28:19PM +0100, Robin Murphy wrote:

On 2021-03-30 14:58, Will Deacon wrote:

On Tue, Mar 30, 2021 at 02:19:38PM +0100, Robin Murphy wrote:

On 2021-03-30 14:11, Will Deacon wrote:

On Tue, Mar 16, 2021 at 04:38:22PM +0100, Christoph Hellwig wrote:

From: Robin Murphy 

Instead make the global iommu_dma_strict paramete in iommu.c canonical by
exporting helpers to get and set it and use those directly in the drivers.

This make sure that the iommu.strict parameter also works for the AMD and
Intel IOMMU drivers on x86.  As those default to lazy flushing a new
IOMMU_CMD_LINE_STRICT is used to turn the value into a tristate to
represent the default if not overriden by an explicit parameter.

Signed-off-by: Robin Murphy .
[ported on top of the other iommu_attr changes and added a few small
 missing bits]
Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/amd/iommu.c   | 23 +---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 50 +---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  1 -
 drivers/iommu/arm/arm-smmu/arm-smmu.c   | 27 +
 drivers/iommu/dma-iommu.c   |  9 +--
 drivers/iommu/intel/iommu.c | 64 -
 drivers/iommu/iommu.c   | 27 ++---
 include/linux/iommu.h   |  4 +-
 8 files changed, 40 insertions(+), 165 deletions(-)


I really like this cleanup, but I can't help wonder if it's going in the
wrong direction. With SoCs often having multiple IOMMU instances and a
distinction between "trusted" and "untrusted" devices, then having the
flush-queue enabled on a per-IOMMU or per-domain basis doesn't sound
unreasonable to me, but this change makes it a global property.


The intent here was just to streamline the existing behaviour of stuffing a
global property into a domain attribute then pulling it out again in the
illusion that it was in any way per-domain. We're still checking
dev_is_untrusted() before making an actual decision, and it's not like we
can't add more factors at that point if we want to.


Like I say, the cleanup is great. I'm just wondering whether there's a
better way to express the complicated logic to decide whether or not to use
the flush queue than what we end up with:

if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
domain->ops->flush_iotlb_all && !iommu_get_dma_strict())

which is mixing up globals, device properties and domain properties. The
result is that the driver code ends up just using the global to determine
whether or not to pass IO_PGTABLE_QUIRK_NON_STRICT to the page-table code,
which is a departure from the current way of doing things.


But previously, SMMU only ever saw the global policy piped through the
domain attribute by iommu_group_alloc_default_domain(), so there's no
functional change there.


For DMA domains sure, but I don't think that's the case for unmanaged
domains such as those used by VFIO.


Eh? This is only relevant to DMA domains anyway. Flush queues are part of
the IOVA allocator that VFIO doesn't even use. It's always been the case
that unmanaged domains only use strict invalidation.


Maybe I'm going mad. With this patch, the SMMU driver unconditionally sets
IO_PGTABLE_QUIRK_NON_STRICT for page-tables if iommu_get_dma_strict() is
true, no? In which case, that will get set for page-tables corresponding
to unmanaged domains as well as DMA domains when it is enabled. That didn't
happen before because you couldn't set the attribute for unmanaged domains.

What am I missing?


Oh cock... sorry, all this time I've been saying what I *expect* it to 
do, while overlooking the fact that the IO_PGTABLE_QUIRK_NON_STRICT 
hunks were the bits I forgot to write and Christoph had to fix up. 
Indeed, those should be checking the domain type too to preserve the 
existing behaviour. Apologies for the confusion.


Robin.


Obviously some of the above checks could be factored out into some kind of
iommu_use_flush_queue() helper that IOMMU drivers can also call if they need
to keep in sync. Or maybe we just allow iommu-dma to set
IO_PGTABLE_QUIRK_NON_STRICT directly via iommu_set_pgtable_quirks() if we're
treating that as a generic thing now.


I think a helper that takes a domain would be a good starting point.


You mean device, right? The one condition we currently have is at the device
level, and there's really nothing inherent to the domain itself that matters
(since the type is implicitly IOMMU_DOMAIN_DMA to even care about this).


Device would probably work too; you'd pass the first device to attach to the
domain when querying this from the SMMU driver, I suppose.

Will


___
dri-devel mailing list
dri-devel@lists.freedesktop.org

Re: [v1] drm/msm/disp/dpu1: fix warn stack reported during dpu resume

2021-03-31 Thread Dmitry Baryshkov

On 31/03/2021 14:27, Kalyan Thota wrote:

WARN_ON was introduced by the below commit to catch runtime resumes
that are getting triggered before icc path was set.

"drm/msm/disp/dpu1: icc path needs to be set before dpu runtime resume"

For the targets where the bw scaling is not enabled, this WARN_ON is
a false alarm. Fix the WARN condition appropriately.


Should we change all DPU targets to use bw scaling to the mdp from the 
mdss nodes? The limitation to sc7180 looks artificial.




Reported-by: Steev Klimaszewski 
Signed-off-by: Kalyan Thota 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  8 +---
  drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h  |  9 +
  drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 11 ++-
  3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index cab387f..0071a4d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -294,6 +294,9 @@ static int dpu_kms_parse_data_bus_icc_path(struct dpu_kms 
*dpu_kms)
struct icc_path *path1;
struct drm_device *dev = dpu_kms->dev;
  
+	if (!dpu_supports_bw_scaling(dev))

+   return 0;
+
path0 = of_icc_get(dev->dev, "mdp0-mem");
path1 = of_icc_get(dev->dev, "mdp1-mem");
  
@@ -934,8 +937,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)

DPU_DEBUG("REG_DMA is not defined");
}
  
-	if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))

-   dpu_kms_parse_data_bus_icc_path(dpu_kms);
+   dpu_kms_parse_data_bus_icc_path(dpu_kms);
  
  	pm_runtime_get_sync(_kms->pdev->dev);
  
@@ -1198,7 +1200,7 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
  
  	ddev = dpu_kms->dev;
  
-	WARN_ON(!(dpu_kms->num_paths));

+   WARN_ON((dpu_supports_bw_scaling(ddev) && !dpu_kms->num_paths));
/* Min vote of BW is required before turning on AXI clk */
for (i = 0; i < dpu_kms->num_paths; i++)
icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index d6717d6..f7bcc0a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -154,6 +154,15 @@ struct vsync_info {
  
  #define to_dpu_global_state(x) container_of(x, struct dpu_global_state, base)
  
+/**

+ * dpu_supports_bw_scaling: returns true for drivers that support bw scaling.
+ * @dev: Pointer to drm_device structure
+ */
+static inline int dpu_supports_bw_scaling(struct drm_device *dev)
+{
+   return of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss");
+}
+
  /* Global private object state for tracking resources that are shared across
   * multiple kms objects (planes/crtcs/etc).
   */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index cd40788..8cd712c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -41,6 +41,9 @@ static int dpu_mdss_parse_data_bus_icc_path(struct drm_device 
*dev,
struct icc_path *path0 = of_icc_get(dev->dev, "mdp0-mem");
struct icc_path *path1 = of_icc_get(dev->dev, "mdp1-mem");
  
+	if (dpu_supports_bw_scaling(dev))

+   return 0;
+
if (IS_ERR_OR_NULL(path0))
return PTR_ERR_OR_ZERO(path0);
  
@@ -276,11 +279,9 @@ int dpu_mdss_init(struct drm_device *dev)
  
  	DRM_DEBUG("mapped mdss address space @%pK\n", dpu_mdss->mmio);
  
-	if (!of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) {

-   ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
-   if (ret)
-   return ret;
-   }
+   ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss);
+   if (ret)
+   return ret;
  
  	mp = _mdss->mp;

ret = msm_dss_parse_clock(pdev, mp);




--
With best wishes
Dmitry
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


  1   2   3   >