date:20210331

[PATCH 33/34] drm/amdkfd: Add SVM API support capability bits

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

SVMAPISupported property added to HSA_CAPABILITY, the value match
HSA_CAPABILITY defined in Thunk spec:

SVMAPISupported: it will not be supported on older kernels that don't
have HMM or on systems with GFXv8 or older GPUs without support for
48-bit virtual addresses.

CoherentHostAccess property added to HSA_MEMORYPROPERTY, the value match
HSA_MEMORYPROPERTY defined in Thunk spec:

CoherentHostAccess: whether or not device memory can be coherently
accessed by the host CPU.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index cdef608db4f4..ab9fe854b4d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1419,6 +1419,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.capability |= (adev->ras_features != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
 
+   /* SVM API and HMM page migration work together, device memory type
+* is initalized to not 0 when page migration register device memory.
+*/
+   if (adev->kfd.dev->pgmap.type != 0)
+   dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
kfd_debug_print_topology();
 
if (!res)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index b8b68087bd7a..6bd6380b0ee0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -53,8 +53,9 @@
 #define HSA_CAP_ASIC_REVISION_MASK 0x03c0
 #define HSA_CAP_ASIC_REVISION_SHIFT22
 #define HSA_CAP_SRAM_EDCSUPPORTED  0x0400
+#define HSA_CAP_SVMAPI_SUPPORTED   0x0800
 
-#define HSA_CAP_RESERVED   0xf80f8000
+#define HSA_CAP_RESERVED   0xf00f8000
 
 struct kfd_node_properties {
uint64_t hive_id;
@@ -98,9 +99,10 @@ struct kfd_node_properties {
 #define HSA_MEM_HEAP_TYPE_GPU_LDS  4
 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
 
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
-#define HSA_MEM_FLAGS_RESERVED 0xfffc
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
+#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
+#define HSA_MEM_FLAGS_COHERENTHOSTACCESS   0x0004
+#define HSA_MEM_FLAGS_RESERVED 0xfff8
 
 struct kfd_mem_properties {
struct list_headlist;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 34/34] drm/amdkfd: Add CONFIG_HSA_AMD_SVM

2021-03-31 Thread Felix Kuehling

Control whether to build SVM support into amdgpu with a Kconfig option.
This makes it easier to disable it in production kernels if this new
feature causes problems in production environments.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Kconfig   | 15 +++-
 drivers/gpu/drm/amd/amdkfd/Makefile  |  9 ---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  7 ++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 17 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 30 
 5 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 7880fc101a3b..d03a79e14126 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -8,8 +8,21 @@ config HSA_AMD
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
-   select DEVICE_PRIVATE
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
help
  Enable this if you want to use HSA features on AMD GPU devices.
+
+config HSA_AMD_SVM
+   bool "Enable HMM-based shared virtual memory manager"
+   depends on HSA_AMD
+   default y
+   select HMM_MIRROR
+   select MMU_NOTIFIER
+   select DEVICE_PRIVATE
+   help
+ Enable this to use unified memory and managed memory in HIP. This
+ memory manager supports two modes of operation. One based on
+ preemptions and one based on page faults. To enable page fault
+ based memory management on most GFXv9 GPUs, set the module
+ parameter amdgpu.noretry=0.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index a93301dbc464..c4f3aff11072 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -54,9 +54,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
-   $(AMDKFD_PATH)/kfd_crat.o \
-   $(AMDKFD_PATH)/kfd_svm.o \
-   $(AMDKFD_PATH)/kfd_migrate.o
+   $(AMDKFD_PATH)/kfd_crat.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
@@ -65,3 +63,8 @@ endif
 ifneq ($(CONFIG_DEBUG_FS),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
 endif
+
+ifneq ($(CONFIG_HSA_AMD_SVM),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
+   $(AMDKFD_PATH)/kfd_migrate.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9838d0cd1f51..f60c44dbae3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1768,6 +1768,7 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
return r;
 }
 
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
struct kfd_ioctl_svm_args *args = data;
@@ -1793,6 +1794,12 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 
return r;
 }
+#else
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+   return -EPERM;
+}
+#endif
 
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index bc680619d135..9119b75b3853 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -24,6 +24,8 @@
 #ifndef KFD_MIGRATE_H_
 #define KFD_MIGRATE_H_
 
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
 #include 
 #include 
 #include 
@@ -43,17 +45,20 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
 
-#if defined(CONFIG_DEVICE_PRIVATE)
 int svm_migrate_init(struct amdgpu_device *adev);
 void svm_migrate_fini(struct amdgpu_device *adev);
 
 #else
+
 static inline int svm_migrate_init(struct amdgpu_device *adev)
 {
-   DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, "
- "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n");
-   return -ENODEV;
+   return 0;
+}
+static inline void svm_migrate_fini(struct amdgpu_device *adev)
+{
+   /* empty */
 }
-static inline void svm_migrate_fini(struct amdgpu_device *adev) {}
-#endif
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
 #endif /* KFD_MIGRATE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index af853726b861..363c282f8747 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -24,6 +24,8 @@
 #ifndef KFD_SVM_H_
 #define KFD_SVM_H_
 
+#if

[PATCH 32/34] drm/amdkfd: multiple gpu migrate vram to vram

2021-03-31 Thread Felix Kuehling

If prefetch range to gpu with acutal location is another gpu, or GPU
retry fault restore pages to migrate the range with acutal location is
gpu, then migrate from one gpu to another gpu.

Use system memory as bridge because sdma engine may not able to access
another gpu vram, use sdma of source gpu to migrate to system memory,
then use sdma of destination gpu to migrate from system memory to gpu.

Print out gpuid or gpuidx in debug messages.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++-
 3 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 73701406acb3..d111f88897db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -486,8 +486,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
-   struct mm_struct *mm)
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -741,6 +742,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm)
return r;
 }
 
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+struct mm_struct *mm)
+{
+   int r;
+
+   /*
+* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+* system memory as migration bridge
+*/
+
+   pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+   r = svm_migrate_vram_to_ram(prange, mm);
+   if (r)
+   return r;
+
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
+{
+   if  (!prange->actual_loc)
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+   else
+   return svm_migrate_vram_to_vram(prange, best_loc, mm);
+
+}
+
 /**
  * svm_migrate_to_ram - CPU page fault handler
  * @vmf: CPU vm fault vma, address
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 9949b55d3b6a..bc680619d135 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,8 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
-   struct mm_struct *mm);
+int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index adb79b10f874..dc0f523ce321 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -347,8 +347,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
-static bool svm_range_validate_svm_bo(struct svm_range *prange)
+static bool
+svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
 {
+   struct amdgpu_device *bo_adev;
+
mutex_lock(>lock);
if (!prange->svm_bo) {
mutex_unlock(>lock);
@@ -360,6 +363,22 @@ static bool svm_range_validate_svm_bo(struct svm_range 
*prange)
return true;
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   /*
+* Migrate from GPU to GPU, remove range from source bo_adev
+* svm_bo range list, and return false to allocate svm_bo from
+* destination adev.
+*/
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   if (bo_adev != adev) {
+   mutex_unlock(>lock);
+
+   spin_lock(>svm_bo->list_lock);
+   list_del_init(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+
+

[PATCH 30/34] drm/amdkfd: refine migration policy with xnack on

2021-03-31 Thread Felix Kuehling

With xnack on, GPU vm fault handler decide the best restore location,
then migrate range to the best restore location and update GPU mapping
to recover the GPU vm fault.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |   7 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  16 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 136 +--
 5 files changed, 150 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 552c4f656e2d..73701406acb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,18 +479,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @mm: the process mm structure
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
+int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   struct mm_struct *mm;
int r = 0;
 
if (prange->actual_loc == best_loc) {
@@ -514,8 +515,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
 
-   mm = current->mm;
-
for (addr = start; addr < end;) {
unsigned long next;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 95fd7b21791f..9949b55d3b6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,7 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc);
+int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ca1b0c518d46..bce44164f1e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 
 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+  struct amdgpu_device *adev, uint32_t *gpuid,
+  uint32_t *gpuidx);
 static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpuidx, uint32_t *gpuid) {
return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index f897c1d0ea66..1d6310f63ae9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1673,6 +1673,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, 
uint32_t gpu_id)
return -EINVAL;
 }
 
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+  uint32_t *gpuid, uint32_t *gpuidx)
+{
+   struct kgd_dev *kgd = (struct kgd_dev *)adev;
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+   *gpuid = p->pdds[i]->dev->id;
+   *gpuidx = i;
+   return 0;
+   }
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 77da6c68fab2..61bf2df38e72 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1283,6 +1283,24 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
if (gpuidx < MAX_GPU_INSTANCE) {
bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
bitmap_set(ctx.bitmap, gpuidx, 1);
+   } else if (ctx.process->xnack_enabled) {
+   bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+

[PATCH 31/34] drm/amdkfd: add svm range validate timestamp

2021-03-31 Thread Felix Kuehling

With xnack on, add validate timestamp in order to handle GPU vm fault
from multiple GPUs.

If GPU retry fault need migrate the range to the best restore location,
use range validate timestamp to record system timestamp after range is
restored to update GPU page table.

Because multiple pages of same range have multiple retry fault, define
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to the long time period that
pending retry fault may still comes after page table update, to skip
duplicate retry fault of same range.

If difference between system timestamp and range last validate timestamp
is bigger than AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING, that means the
retry fault is from another GPU, then continue to handle retry fault
recover.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 61bf2df38e72..adb79b10f874 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,11 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
+
 static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -268,6 +273,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
+   prange->validate_timestamp = ktime_to_us(ktime_get());
mutex_init(>migrate_mutex);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
@@ -1358,6 +1364,9 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
 unreserve_out:
svm_range_unreserve_bos();
 
+   if (!r)
+   prange->validate_timestamp = ktime_to_us(ktime_get());
+
return r;
 }
 
@@ -2098,6 +2107,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
+   uint64_t timestamp;
int32_t best_loc, gpuidx;
int r = 0;
 
@@ -2133,6 +2143,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mutex_lock(>migrate_mutex);
+   timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
+   /* skip duplicate vm fault on different pages of same range */
+   if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+   pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+svms, prange->start, prange->last);
+   goto out_unlock_range;
+   }
 
best_loc = svm_range_best_restore_location(prange, adev, );
if (best_loc == -1) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index f157be434cfa..af853726b861 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -85,6 +85,7 @@ struct svm_work_list_item {
  * @actual_loc: the actual location, 0 for CPU, or GPU id
  * @granularity:migration granularity, log2 num pages
  * @invalid:not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
  * @notifier:   register mmu interval notifier
  * @work_item:  deferred work item information
  * @deferred_list: list header used to add range to deferred list
@@ -121,6 +122,7 @@ struct svm_range {
uint32_tactual_loc;
uint8_t granularity;
atomic_tinvalid;
+   uint64_tvalidate_timestamp;
struct mmu_interval_notifiernotifier;
struct svm_work_list_item   work_item;
struct list_headdeferred_list;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 29/34] drm/amdgpu: reserve fence slot to update page table

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Forgot to reserve a fence slot to use sdma to update page table, cause
below kernel BUG backtrace to handle vm retry fault while application is
exiting.

[  133.048143] kernel BUG at 
/home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281!
[  133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu]
[  133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280
[  133.048672]  amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu]
[  133.048788]  amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu]
[  133.048905]  amdgpu_vm_handle_fault+0x202/0x370 [amdgpu]
[  133.049031]  gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu]
[  133.049165]  ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu]
[  133.049289]  ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049408]  amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049534]  amdgpu_ih_process+0x9b/0x1c0 [amdgpu]
[  133.049657]  amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu]
[  133.049669]  process_one_work+0x29f/0x640
[  133.049678]  worker_thread+0x39/0x3f0
[  133.049685]  ? process_one_work+0x640/0x640

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 83c020411e52..7f696f5c55a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
-   long r;
+   int r;
bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
@@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
value = 0;
}
 
+   r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+   if (r) {
+   pr_debug("failed %d to reserve fence slot\n", r);
+   goto error_unlock;
+   }
+
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
addr, flags, value, NULL, NULL,
NULL);
@@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
 error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
-   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+   DRM_ERROR("Can't handle page fault (%d)\n", r);
 
 error_unref:
amdgpu_bo_unref();
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 26/34] drm/amdkfd: add svm_bo eviction mechanism support

2021-03-31 Thread Felix Kuehling

svm_bo eviction mechanism is different from regular BOs.
Every SVM_BO created contains one eviction fence and one
worker item for eviction process.
SVM_BOs can be attached to one or more pranges.
For SVM_BO eviction mechanism, TTM will start to call
enable_signal callback for every SVM_BO until VRAM space
is available.
Here, all the ttm_evict calls are synchronous, this guarantees
that each eviction has completed and the fence has signaled before
it returns.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 201 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  13 +-
 2 files changed, 168 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7b2c97b7f48c..77da6c68fab2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,7 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -319,7 +320,15 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(_bo->list_lock);
}
spin_unlock(_bo->list_lock);
-
+   if (!dma_fence_is_signaled(_bo->eviction_fence->base)) {
+   /* We're not in the eviction worker.
+* Signal the fence and synchronize with any
+* pending eviction work.
+*/
+   dma_fence_signal(_bo->eviction_fence->base);
+   cancel_work_sync(_bo->eviction_work);
+   }
+   dma_fence_put(_bo->eviction_fence->base);
amdgpu_bo_unref(_bo->bo);
kfree(svm_bo);
 }
@@ -332,6 +341,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
+static bool svm_range_validate_svm_bo(struct svm_range *prange)
+{
+   mutex_lock(>lock);
+   if (!prange->svm_bo) {
+   mutex_unlock(>lock);
+   return false;
+   }
+   if (prange->ttm_res) {
+   /* We still have a reference, all is well */
+   mutex_unlock(>lock);
+   return true;
+   }
+   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   if (READ_ONCE(prange->svm_bo->evicting)) {
+   struct dma_fence *f;
+   struct svm_range_bo *svm_bo;
+   /* The BO is getting evicted,
+* we need to get a new one
+*/
+   mutex_unlock(>lock);
+   svm_bo = prange->svm_bo;
+   f = dma_fence_get(_bo->eviction_fence->base);
+   svm_range_bo_unref(prange->svm_bo);
+   /* wait for the fence to avoid long spin-loop
+* at list_empty_careful
+*/
+   dma_fence_wait(f, false);
+   dma_fence_put(f);
+   } else {
+   /* The BO was still around and we got
+* a new reference to it
+*/
+   mutex_unlock(>lock);
+   pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   prange->ttm_res = >svm_bo->bo->tbo.mem;
+   return true;
+   }
+
+   } else {
+   mutex_unlock(>lock);
+   }
+
+   /* We need a new svm_bo. Spin-loop to wait for concurrent
+* svm_range_bo_release to finish removing this range from
+* its range list. After this, it is safe to reuse the
+* svm_bo pointer and svm_bo_list head.
+*/
+   while (!list_empty_careful(>svm_bo_list))
+   ;
+
+   return false;
+}
+
 static struct svm_range_bo *svm_range_bo_new(void)
 {
struct svm_range_bo *svm_bo;
@@ -351,72 +415,56 @@ int
 svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bool clear)
 {
-   struct amdkfd_process_info *process_info;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
struct amdgpu_bo *bo;
struct kfd_process *p;
+   struct mm_struct *mm;
int r;
 
-   pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last);
-   mutex_lock(>lock);
-   if (prange->svm_bo) {
-   if (prange->ttm_res) {
-   /* We still have a reference, all is well */
-   mutex_unlock(>lock);
-   return 0;
-   }
-   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
-

[PATCH 28/34] drm/amdgpu: add svm_bo eviction to enable_signal cb

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

Add to amdgpu_amdkfd_fence.enable_signal callback, support
for svm_bo fence eviction.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 53559643c712..1fe233cddb20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
 
 static const struct dma_fence_ops amdkfd_fence_ops;
 static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -123,9 +124,13 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence 
*f)
if (dma_fence_is_signaled(f))
return true;
 
-   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
-   return true;
-
+   if (!fence->svm_bo) {
+   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+   return true;
+   } else {
+   if (!svm_range_schedule_evict_svm_bo(fence))
+   return true;
+   }
return false;
 }
 
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 27/34] drm/amdgpu: svm bo enable_signal call condition

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

[why]
To support svm bo eviction mechanism.

[how]
If the BO crated has AMDGPU_AMDKFD_CREATE_SVM_BO flag set,
enable_signal callback will be called inside amdgpu_evict_flags.
This also causes gutting of the BO by removing all placements,
so that TTM won't actually do an eviction. Instead it will discard
the memory held by the BO. This is needed for HMM migration to user
mode system memory pages.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a2585058e65d..17e0f3b60c18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -111,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object 
*bo,
}
 
abo = ttm_to_amdgpu_bo(bo);
+   if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+   struct dma_fence *fence;
+   struct dma_resv *resv = >base._resv;
+
+   rcu_read_lock();
+   fence = rcu_dereference(resv->fence_excl);
+   if (fence && !fence->ops->signaled)
+   dma_fence_enable_sw_signaling(fence);
+
+   placement->num_placement = 0;
+   placement->num_busy_placement = 0;
+   rcu_read_unlock();
+   return;
+   }
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 25/34] drm/amdgpu: add param bit flag to create SVM BOs

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

Add CREATE_SVM_BO define bit for SVM BOs.
Another define flag was moved to concentrate these
KFD type flags in one include file.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h   | 4 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9af644f256e9..bc38de8c5c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -33,9 +33,6 @@
 #include 
 #include "amdgpu_xgmi.h"
 
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
  */
@@ -217,7 +214,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo)
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 
-   if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+   if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
@@ -1278,7 +1275,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
-   bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+   bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 
(*mem)->va = va;
(*mem)->domain = domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 25411b2c4dd9..b07903d317e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -37,6 +37,10 @@
 #define AMDGPU_BO_INVALID_OFFSET   LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS   3
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO(1ULL << 63)
+#define AMDGPU_AMDKFD_CREATE_SVM_BO(1ULL << 62)
+
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 
 struct amdgpu_bo_param {
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 24/34] drm/amdkfd: add svm_bo reference for eviction fence

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

[why]
As part of the SVM functionality, the eviction mechanism used for
SVM_BOs is different. This mechanism uses one eviction fence per prange,
instead of one fence per kfd_process.

[how]
A svm_bo reference to amdgpu_amdkfd_fence to allow differentiate between
SVM_BO or regular BO evictions. This also include modifications to set the
reference at the fence creation call.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 --
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..beb2ef070a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
+   struct svm_range_bo *svm_bo;
 };
 
 struct amdgpu_kfd_dev {
@@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct 
amdgpu_device *adev,
int queue_bit);
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-   struct 
mm_struct *mm);
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo);
 #if IS_ENABLED(CONFIG_HSA_AMD)
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 5af464933976..53559643c712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -60,7 +60,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
  */
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-  struct mm_struct *mm)
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo)
 {
struct amdgpu_amdkfd_fence *fence;
 
@@ -73,7 +74,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 
context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(>lock);
-
+   fence->svm_bo = svm_bo;
dma_fence_init(>base, _fence_ops, >lock,
   context, atomic_inc_return(_seq));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e93850f2f3b1..9af644f256e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -970,7 +970,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
**process_info,
 
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-  current->mm);
+  current->mm,
+  NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -2188,7 +2189,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence **ef)
 */
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
-   process_info->eviction_fence->mm);
+   process_info->eviction_fence->mm,
+   NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 23/34] drm/amdkfd: SVM API call to restore page tables

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

Use SVM API to restore page tables when retry fault and
compute context are enabled.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7e306fd20de4..83c020411e52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "kfd_svm.h"
 
 /**
  * DOC: GPUVM
@@ -3302,18 +3303,29 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
uint64_t value, flags;
struct amdgpu_vm *vm;
long r;
+   bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
vm = idr_find(>vm_manager.pasid_idr, pasid);
-   if (vm)
+   if (vm) {
root = amdgpu_bo_ref(vm->root.base.bo);
-   else
+   is_compute_context = vm->is_compute_context;
+   } else {
root = NULL;
+   }
spin_unlock(>vm_manager.pasid_lock);
 
if (!root)
return false;
 
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+
+   if (is_compute_context &&
+   !svm_range_restore_pages(adev, pasid, addr)) {
+   amdgpu_bo_unref();
+   return true;
+   }
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@@ -3327,18 +3339,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
if (!vm)
goto error_unlock;
 
-   addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
 
-   if (vm->is_compute_context) {
+   if (is_compute_context) {
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
AMDGPU_PTE_TF;
value = 0;
-
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 22/34] drm/amdkfd: page table restore through svm API

2021-03-31 Thread Felix Kuehling

Page table restore implementation in SVM API. This is called from
the fault handler at amdgpu_vm. To update page tables through
the page fault retry IH.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 59 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
 2 files changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7da58a4214c0..7b2c97b7f48c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1964,6 +1964,65 @@ svm_range_from_addr(struct svm_range_list *svms, 
unsigned long addr,
return NULL;
 }
 
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   int r = 0;
+   struct mm_struct *mm = NULL;
+   struct svm_range *prange;
+   struct svm_range_list *svms;
+   struct kfd_process *p;
+
+   p = kfd_lookup_process_by_pasid(pasid);
+   if (!p) {
+   pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+   return -ESRCH;
+   }
+   if (!p->xnack_enabled) {
+   pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+   return -EFAULT;
+   }
+   svms = >svms;
+
+   pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+   mm = get_task_mm(p->lead_thread);
+   if (!mm) {
+   pr_debug("svms 0x%p failed to get mm\n", svms);
+   r = -ESRCH;
+   goto out;
+   }
+
+   mmap_read_lock(mm);
+   mutex_lock(>lock);
+   prange = svm_range_from_addr(svms, addr, NULL);
+
+   if (!prange) {
+   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+svms, addr);
+   r = -EFAULT;
+   goto out_unlock_svms;
+   }
+
+   mutex_lock(>migrate_mutex);
+
+   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, 
false);
+   if (r)
+   pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r,
+svms, prange->start, prange->last);
+
+   mutex_unlock(>migrate_mutex);
+out_unlock_svms:
+   mutex_unlock(>lock);
+   mmap_read_unlock(mm);
+   mmput(mm);
+out:
+   kfd_unref_process(p);
+
+   return r;
+}
+
 void svm_range_list_fini(struct kfd_process *p)
 {
struct svm_range *prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6cc12de8d76a..168c623f4477 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -154,6 +154,8 @@ void svm_range_vram_node_free(struct svm_range *prange);
 int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
   unsigned long addr, struct svm_range *parent,
   struct svm_range *prange);
+int svm_range_restore_pages(struct amdgpu_device *adev,
+   unsigned int pasid, uint64_t addr);
 void svm_range_add_list_work(struct svm_range_list *svms,
 struct svm_range *prange, struct mm_struct *mm,
 enum svm_work_list_ops op);
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 20/34] drm/amdkfd: invalidate tables on page retry fault

2021-03-31 Thread Felix Kuehling

GPU page tables are invalidated by unmapping prange directly at
the mmu notifier, when page fault retry is enabled through
amdgpu_noretry global parameter. The restore page table is
performed at the page fault handler.

If xnack is on, we update GPU mappings after migration to avoid
unnecessary GPUVM faults.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  4 +-
 3 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 81bae0adc0cf..552c4f656e2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -807,7 +807,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
 prange, prange->start, prange->last);
 
-   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && parent == prange)
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+   else
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
svm_range_add_list_work(>svms, parent, mm, op);
schedule_deferred_list_work(>svms);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a83a7242c760..7da58a4214c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -912,6 +912,13 @@ svm_range_split_by_granularity(struct kfd_process *p, 
struct mm_struct *mm,
svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
}
 
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
+   prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+   pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+prange, prange->start, prange->last,
+SVM_OP_ADD_RANGE_AND_MAP);
+   }
return 0;
 }
 
@@ -1418,25 +1425,54 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
unsigned long start, unsigned long last)
 {
struct svm_range_list *svms = prange->svms;
-   int invalid, evicted_ranges;
+   struct kfd_process *p;
int r = 0;
 
-   invalid = atomic_inc_return(>invalid);
-   evicted_ranges = atomic_inc_return(>evicted_ranges);
-   if (evicted_ranges != 1)
-   return r;
+   p = container_of(svms, struct kfd_process, svms);
 
-   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
-prange->svms, prange->start, prange->last);
+   pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+svms, prange->start, prange->last, start, last);
 
-   /* First eviction, stop the queues */
-   r = kgd2kfd_quiesce_mm(mm);
-   if (r)
-   pr_debug("failed to quiesce KFD\n");
+   if (!p->xnack_enabled) {
+   int invalid, evicted_ranges;
 
-   pr_debug("schedule to restore svm %p ranges\n", svms);
-   schedule_delayed_work(>restore_work,
-   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   invalid = atomic_inc_return(>invalid);
+   evicted_ranges = atomic_inc_return(>evicted_ranges);
+   if (evicted_ranges != 1)
+   return r;
+
+   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   /* First eviction, stop the queues */
+   r = kgd2kfd_quiesce_mm(mm);
+   if (r)
+   pr_debug("failed to quiesce KFD\n");
+
+   pr_debug("schedule to restore svm %p ranges\n", svms);
+   schedule_delayed_work(>restore_work,
+   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   } else {
+   struct svm_range *pchild;
+   unsigned long s, l;
+
+   pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+prange->svms, start, last);
+   svm_range_lock(prange);
+   list_for_each_entry(pchild, >child_list, child_list) {
+   mutex_lock_nested(>lock, 1);
+   s = max(start, pchild->start);
+   l = min(last, pchild->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(pchild, s, l);
+   mutex_unlock(>lock);
+   }
+   s = max(start, prange->start);
+   l = min(last, prange->last);
+   if

[PATCH 21/34] drm/amdgpu: enable 48-bit IH timestamp counter

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

By default this timestamp is 32 bit counter. It gets
overflowed in around 10 minutes.

Change-Id: I7c46604b0272dcfd1ce24351437c16fe53dca0ab
Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ca8efa5c6978..2f17c8a57015 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct 
amdgpu_device *adev,
 
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+   tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == >irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 
0));
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 19/34] drm/amdkfd: HMM migrate vram to ram

2021-03-31 Thread Felix Kuehling

If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start
migrate memory from vram to ram in steps:

1. migrate_vma_pages get vram pages, and notify HMM to invalidate the
pages, HMM interval notifier callback evict process queues
2. Allocate system memory pages
3. Use svm copy memory to migrate data from vram to ram
4. migrate_vma_pages copy pages structure from vram pages to ram pages
5. Return VM_FAULT_SIGBUS if migration failed, to notify application
6. migrate_vma_finalize put vram pages, page_free callback free vram
pages and vram nodes
7. Restore work wait for migration is finished, then update GPU page
table mapping to system memory, and resume process queues

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 300 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 123 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  10 +
 4 files changed, 426 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fcaf34096820..81bae0adc0cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -259,6 +259,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, 
unsigned long addr)
put_page(page);
 }
 
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+   unsigned long addr;
+
+   addr = page_to_pfn(page) << PAGE_SHIFT;
+   return (addr - adev->kfd.dev->pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+   struct page *page;
+
+   page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+   if (page)
+   lock_page(page);
+
+   return page;
+}
+
+void svm_migrate_put_sys_page(unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(addr >> PAGE_SHIFT);
+   unlock_page(page);
+   put_page(page);
+}
 
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
@@ -511,13 +540,213 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
 
 static void svm_migrate_page_free(struct page *page)
 {
+   /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+   struct migrate_vma *migrate, struct dma_fence **mfence,
+   dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   uint64_t *src;
+   dma_addr_t *dst;
+   struct page *dpage;
+   uint64_t i = 0, j;
+   uint64_t addr;
+   int r = 0;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   addr = prange->start << PAGE_SHIFT;
+
+   src = (uint64_t *)(scratch + npages);
+   dst = scratch;
+
+   for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+   struct page *spage;
+
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   if (!spage) {
+   pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+   src[i] = svm_migrate_addr(adev, spage);
+   if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+   r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+src + i - j, j,
+FROM_VRAM_TO_RAM,
+mfence);
+   if (r)
+   goto out_oom;
+   j = 0;
+   }
+
+   dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+   if (!dpage) {
+   pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+
+   dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, 
DMA_FROM_DEVICE);
+   r = dma_mapping_error(dev, dst[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   goto out_oom;
+   }
+
+   pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
+ dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+
+   migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+   migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+   }
+
+   r =

[PATCH 18/34] drm/amdkfd: HMM migrate ram to vram

2021-03-31 Thread Felix Kuehling

Register svm range with same address and size but perferred_location
is changed from CPU to GPU or from GPU to CPU, trigger migration the svm
range from ram to vram or from vram to ram.

If svm range prefetch location is GPU with flags
KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first,
then migrate it from ram to vram.

After migrating to vram is done, CPU access will have cpu page fault,
page fault handler migrate it back to ram and resume cpu access.

Migration steps:

1. migrate_vma_pages get svm range ram pages, notify the
interval is invalidated and unmap from CPU page table, HMM interval
notifier callback evict process queues
2. Allocate new pages in vram using TTM
3. Use svm copy memory to sdma copy data from ram to vram
4. migrate_vma_pages copy ram pages structure to vram pages structure
5. migrate_vma_finalize put ram pages to free ram pages and memory
6. Restore work wait for migration is finished, then update GPUs page
table mapping to new vram pages, resume process queues

If migrate_vma_setup failed to collect all ram pages of range, retry 3
times until success to start migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 305 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 197 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +
 4 files changed, 500 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 2a6824ddae88..fcaf34096820 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -204,6 +204,311 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct 
dma_fence *mfence)
return r;
 }
 
+static uint64_t
+svm_migrate_node_physical_addr(struct amdgpu_device *adev,
+  struct drm_mm_node **mm_node, uint64_t *offset)
+{
+   struct drm_mm_node *node = *mm_node;
+   uint64_t pos = *offset;
+
+   if (node->start == AMDGPU_BO_INVALID_OFFSET) {
+   pr_debug("drm node is not validated\n");
+   return 0;
+   }
+
+   pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
+node->size);
+
+   if (pos >= node->size) {
+   do  {
+   pos -= node->size;
+   node++;
+   } while (pos >= node->size);
+
+   *mm_node = node;
+   *offset = pos;
+   }
+
+   return (node->start + pos) << PAGE_SHIFT;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+   return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+   struct page *page;
+
+   page = pfn_to_page(pfn);
+   page->zone_device_data = prange;
+   get_page(page);
+   lock_page(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+   unlock_page(page);
+   put_page(page);
+}
+
+
+static int
+svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+struct migrate_vma *migrate, struct dma_fence **mfence,
+dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   struct drm_mm_node *node;
+   dma_addr_t *src;
+   uint64_t *dst;
+   uint64_t vram_addr;
+   uint64_t offset;
+   uint64_t i, j;
+   int r = -ENOMEM;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   src = scratch;
+   dst = (uint64_t *)(scratch + npages);
+
+   r = svm_range_vram_node_new(adev, prange, true);
+   if (r) {
+   pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+   goto out;
+   }
+
+   node = prange->ttm_res->mm_node;
+   offset = prange->offset;
+   vram_addr = svm_migrate_node_physical_addr(adev, , );
+   if (!vram_addr) {
+   WARN_ONCE(1, "vram node address is 0\n");
+   r = -ENOMEM;
+   goto out;
+   }
+
+   for (i = j = 0; i < npages; i++) {
+   struct page *spage;
+
+   dst[i] = vram_addr + (j << PAGE_SHIFT);
+   migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+   svm_migrate_get_vram_page(prange, migrate->dst[i]);
+
+   migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+   migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+
+   if (migrate->src[i] & MIGRATE_PFN_VALID) {
+   spage = migrate_pfn_to_page(migrate->src[i]);
+

[PATCH 17/34] drm/amdkfd: copy memory through gart table

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Use sdma linear copy to migrate data between ram and vram. The sdma
linear copy command uses kernel buffer function queue to access system
memory through gart table.

Use reserved gart table window 0 to map system page address, and vram
page address is direct mapping. Use the same kernel buffer function to
fill in gart table mapping, so this is serialized with memory copy by
sdma job submit. We only need wait for the last memory copy sdma fence
for larger buffer migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 172 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   5 +
 2 files changed, 177 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 4bb39c562665..2a6824ddae88 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -32,6 +32,178 @@
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
 
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+   return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+   struct amdgpu_device *adev = ring->adev;
+   struct amdgpu_job *job;
+   unsigned int num_dw, num_bytes;
+   struct dma_fence *fence;
+   uint64_t src_addr, dst_addr;
+   uint64_t pte_flags;
+   void *cpu_addr;
+   int r;
+
+   /* use gart window 0 */
+   *gart_addr = adev->gmc.gart_start;
+
+   num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+   num_bytes = npages * 8;
+
+   r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+AMDGPU_IB_POOL_DELAYED, );
+   if (r)
+   return r;
+
+   src_addr = num_dw * 4;
+   src_addr += job->ibs[0].gpu_addr;
+
+   dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+   amdgpu_emit_copy_buffer(adev, >ibs[0], src_addr,
+   dst_addr, num_bytes, false);
+
+   amdgpu_ring_pad_ib(ring, >ibs[0]);
+   WARN_ON(job->ibs[0].length_dw > num_dw);
+
+   pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+   pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+   if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+   pte_flags |= AMDGPU_PTE_WRITEABLE;
+   pte_flags |= adev->gart.gart_pte_flags;
+
+   cpu_addr = >ibs[0].ptr[num_dw];
+
+   r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+   if (r)
+   goto error_free;
+
+   r = amdgpu_job_submit(job, >mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, );
+   if (r)
+   goto error_free;
+
+   dma_fence_put(fence);
+
+   return r;
+
+error_free:
+   amdgpu_job_free(job);
+   return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait 
for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+uint64_t *vram, uint64_t npages,
+enum MIGRATION_COPY_DIR direction,
+struct dma_fence **mfence)
+{
+   const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+   struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+   uint64_t gart_s, gart_d;
+   struct dma_fence *next;
+   uint64_t size;
+   int r;
+
+   mutex_lock(>mman.gtt_window_lock);
+
+   while (npages) {
+   size = min(GTT_MAX_PAGES, npages);
+
+   if (direction == FROM_VRAM_TO_RAM) {
+   gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+   r = svm_migrate_gart_map(ring, size, sys, _d, 0);
+
+   } else if (direction == FROM_RAM_TO_VRAM) {
+   r = svm_migrate_gart_map(ring, size, sys, _s,
+KFD_IOCTL_SVM_FLAG_GPU_RO);
+   gart_d =

[PATCH 16/34] drm/amdkfd: support xgmi same hive mapping

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

amdgpu_gmc_get_vm_pte use bo_va->is_xgmi same hive information to set
pte flags to update GPU mapping. Add local structure variable bo_va, and
update bo_va.is_xgmi, pass it to mapping->bo_va while mapping to GPU.

Assuming xgmi pstate is hi after boot.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f01bc3b4010d..f557f67b9d2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -26,6 +26,8 @@
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_mn.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
@@ -953,21 +955,27 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 struct svm_range *prange, dma_addr_t *dma_addr,
-struct dma_fence **fence)
+struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
+   struct amdgpu_bo_va bo_va;
uint64_t pte_flags;
int r = 0;
 
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
 prange->last);
 
+   if (prange->svm_bo && prange->ttm_res) {
+   bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
+   prange->mapping.bo_va = _va;
+   }
+
prange->mapping.start = prange->start;
prange->mapping.last = prange->last;
prange->mapping.offset = prange->offset;
pte_flags = svm_range_get_pte_flags(adev, prange);
prange->mapping.flags = pte_flags;
 
-   r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL,
+   r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
prange->mapping.start,
prange->mapping.last, pte_flags,
prange->mapping.offset,
@@ -990,6 +998,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
*fence = dma_fence_get(vm->last_update);
 
 out:
+   prange->mapping.bo_va = NULL;
return r;
 }
 
@@ -997,12 +1006,18 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange,
 unsigned long *bitmap, bool wait)
 {
struct kfd_process_device *pdd;
+   struct amdgpu_device *bo_adev;
struct amdgpu_device *adev;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
int r = 0;
 
+   if (prange->svm_bo && prange->ttm_res)
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   else
+   bo_adev = NULL;
+
p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -1016,8 +1031,14 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange,
if (IS_ERR(pdd))
return -EINVAL;
 
+   if (bo_adev && adev != bo_adev &&
+   !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+   pr_debug("cannot map to device idx %d\n", gpuidx);
+   continue;
+   }
+
r = svm_range_map_to_gpu(adev, pdd->vm, prange,
-prange->dma_addr[gpuidx],
+prange->dma_addr[gpuidx], bo_adev,
 wait ?  : NULL);
if (r)
break;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 13/34] drm/amdkfd: add ioctl to configure and query xnack retries

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

Xnack retries are used for page fault recovery. Some AMD chip
families support continuously retry while page table entries are invalid.
The driver must handle the page fault interrupt and fill in a valid entry
for the GPU to continue.

This ioctl allows to enable/disable XNACK retries per KFD process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 +++
 include/uapi/linux/kfd_ioctl.h   | 43 +++-
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9511826ac8ae..9838d0cd1f51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1743,6 +1743,31 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+   struct kfd_process *p, void *data)
+{
+   struct kfd_ioctl_set_xnack_mode_args *args = data;
+   int r = 0;
+
+   mutex_lock(>mutex);
+   if (args->xnack_enabled >= 0) {
+   if (!list_empty(>pqm.queues)) {
+   pr_debug("Process has user queues running\n");
+   mutex_unlock(>mutex);
+   return -EBUSY;
+   }
+   if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+   r = -EPERM;
+   else
+   p->xnack_enabled = args->xnack_enabled;
+   } else {
+   args->xnack_enabled = p->xnack_enabled;
+   }
+   mutex_unlock(>mutex);
+
+   return r;
+}
+
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
struct kfd_ioctl_svm_args *args = data;
@@ -1869,6 +1894,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
kfd_ioctl_smi_events, 0),
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+   kfd_ioctl_set_xnack_mode, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 247b57baa94f..3cb5b5dd9f77 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args {
struct kfd_ioctl_svm_attribute attrs[0];
 };
 
+/**
+ * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode
+ *
+ * @xnack_enabled:   [in/out] Whether to enable XNACK mode for this process
+ *
+ * @xnack_enabled indicates whether recoverable page faults should be
+ * enabled for the current process. 0 means disabled, positive means
+ * enabled, negative means leave unchanged. If enabled, virtual address
+ * translations on GFXv9 and later AMD GPUs can return XNACK and retry
+ * the access until a valid PTE is available. This is used to implement
+ * device page faults.
+ *
+ * On output, @xnack_enabled returns the (new) current mode (0 or
+ * positive). Therefore, a negative input value can be used to query
+ * the current mode without changing it.
+ *
+ * The XNACK mode fundamentally changes the way SVM managed memory works
+ * in the driver, with subtle effects on application performance and
+ * functionality.
+ *
+ * Enabling XNACK mode requires shader programs to be compiled
+ * differently. Furthermore, not all GPUs support changing the mode
+ * per-process. Therefore changing the mode is only allowed while no
+ * user mode queues exist in the process. This ensure that no shader
+ * code is running that may be compiled for the wrong mode. And GPUs
+ * that cannot change to the requested mode will prevent the XNACK
+ * mode from occurring. All GPUs used by the process must be in the
+ * same XNACK mode.
+ *
+ * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM.
+ * Therefore those GPUs are not considered for the XNACK mode switch.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+struct kfd_ioctl_set_xnack_mode_args {
+   __s32 xnack_enabled;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)   _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args {
 
 #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
 
+#define AMDKFD_IOC_SET_XNACK_MODE  \
+   AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
+
 #define AMDKFD_COMMAND_START   0x01
-#define AMDKFD_COMMAND_END 0x21
+#define AMDKFD_COMMAND_END 0x22
 
 #endif
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 15/34] drm/amdkfd: validate vram svm range from TTM

2021-03-31 Thread Felix Kuehling

If svm range perfetch location is not zero, use TTM to alloc
amdgpu_bo vram nodes to validate svm range, then map vram nodes to GPUs.

Use offset to sub allocate from the same amdgpu_bo to handle overlap
vram range while adding new range or unmapping range.

svm_bo has ref count to trace the shared ranges. If all ranges of shared
amdgpu_bo are migrated to ram, ref count becomes 0, then amdgpu_bo is
released, all ranges svm_bo is set to NULL.

To migrate range from ram back to vram, allocate the same amdgpu_bo
with previous offset if the range has svm_bo.

If prange migrate to VRAM, no CPU mapping exist, then process exit will
not have unmap callback for this prange to free prange and svm bo. Free
outstanding pranges from svms list before process is freed in
svm_range_list_fini.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 293 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  20 ++
 2 files changed, 306 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b0e0b243754c..f01bc3b4010d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -44,7 +44,8 @@ static const struct mmu_interval_notifier_ops 
svm_range_mn_ops = {
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
  *
- * Remove the svm range from svms interval tree and link list
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
  *
  * Context: The caller must hold svms->lock
  */
@@ -53,6 +54,12 @@ static void svm_range_unlink(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
 prange, prange->start, prange->last);
 
+   if (prange->svm_bo) {
+   spin_lock(>svm_bo->list_lock);
+   list_del(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+   }
+
list_del(>list);
if (prange->it_node.start != 0 && prange->it_node.last != 0)
interval_tree_remove(>it_node, >svms->objects);
@@ -217,6 +224,7 @@ static void svm_range_free(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
mutex_destroy(>lock);
kfree(prange);
@@ -251,6 +259,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>svm_bo_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
@@ -264,6 +273,210 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
return prange;
 }
 
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo || !kref_get_unless_zero(_bo->kref))
+   return false;
+
+   return true;
+}
+
+static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+   if (svm_bo)
+   kref_get(_bo->kref);
+
+   return svm_bo;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = container_of(kref, struct svm_range_bo, kref);
+   spin_lock(_bo->list_lock);
+   while (!list_empty(_bo->range_list)) {
+   struct svm_range *prange =
+   list_first_entry(_bo->range_list,
+   struct svm_range, svm_bo_list);
+   /* list_del_init tells a concurrent svm_range_vram_node_new when
+* it's safe to reuse the svm_bo pointer and svm_bo_list head.
+*/
+   list_del_init(>svm_bo_list);
+   spin_unlock(_bo->list_lock);
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange->start, prange->last);
+   mutex_lock(>lock);
+   prange->svm_bo = NULL;
+   mutex_unlock(>lock);
+
+   spin_lock(_bo->list_lock);
+   }
+   spin_unlock(_bo->list_lock);
+
+   amdgpu_bo_unref(_bo->bo);
+   kfree(svm_bo);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo)
+   return;
+
+   kref_put(_bo->kref, svm_range_bo_release);
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
+   if (!svm_bo)
+   return NULL;
+
+   kref_init(_bo->kref);
+   INIT_LIST_HEAD(_bo->range_list);
+   spin_lock_init(_bo->list_lock);
+
+   return svm_bo;
+}
+
+int

[PATCH 14/34] drm/amdkfd: register HMM device private zone

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to
allocate vram backing pages for page migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Kconfig   |   1 +
 drivers/gpu/drm/amd/amdkfd/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c  |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 103 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  48 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +
 6 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index f02c938f75da..7880fc101a3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -8,6 +8,7 @@ config HSA_AMD
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
+   select DEVICE_PRIVATE
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
help
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 387ce0217d35..a93301dbc464 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -55,7 +55,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o \
-   $(AMDKFD_PATH)/kfd_svm.o
+   $(AMDKFD_PATH)/kfd_svm.o \
+   $(AMDKFD_PATH)/kfd_migrate.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f860cd705961..918aa03a9e27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -30,6 +30,7 @@
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
+#include "kfd_migrate.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -814,6 +815,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
kfd_cwsr_init(kfd);
 
+   svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+
if (kfd_resume(kfd))
goto kfd_resume_error;
 
@@ -862,6 +865,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 {
if (kfd->init_complete) {
kgd2kfd_suspend(kfd, false);
+   svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644
index ..4bb39c562665
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+static void svm_migrate_page_free(struct page *page)
+{
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, mm->mmap_sem is taken
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+   .page_free  = svm_migrate_page_free,
+   .migrate_to_ram = svm_migrate_to_ram,
+};
+
+int svm_migrate_init(struct

[PATCH 12/34] drm/amdkfd: add xnack enabled flag to kfd_process

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

XNACK mode controls the SQ RETRY_DISABLE setting that determines,
whether recoverable page faults can be supported on GFXv9 hardware.
Only on Aldebaran we can support different processes running with
different XNACK modes. On older chips all processes must use the same
RETRY_DISABLE setting. However, processes not relying on recoverable
page faults can work with RETRY enabled. This means XNACK off is always
available as a fallback so we can use the same mode on all GPUs in a
process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  | 13 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  4 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 45 +++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index eca6331efa94..b5c3d13643f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
-   if (dqm->dev->noretry &&
-   !dqm->dev->use_iommu_v2)
+
+   if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+   /* Aldebaran can safely support different XNACK modes
+* per process
+*/
+   if (!pdd->process->xnack_enabled)
+   qpd->sh_mem_config |=
+   1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+   } else if (dqm->dev->noretry &&
+  !dqm->dev->use_iommu_v2) {
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+   }
 
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1b829eef9e50..5c0efaaebd8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -824,6 +824,8 @@ struct kfd_process {
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool svm_disabled;
+
+   bool xnack_enabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -883,6 +885,8 @@ struct kfd_process_device 
*kfd_get_process_device_data(struct kfd_dev *dev,
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
+
 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
  struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3bcde43ccd70..f897c1d0ea66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1193,6 +1193,48 @@ void kfd_process_set_trap_handler(struct 
qcm_process_device *qpd,
}
 }
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+   int i;
+
+   /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+* boot time retry setting. Mixing processes with different
+* XNACK/retry settings can hang the GPU.
+*
+* Different GPUs can have different noretry settings depending
+* on HW bugs or limitations. We need to find at least one
+* XNACK mode for this process that's compatible with all GPUs.
+* Fortunately GPUs with retry enabled (noretry=0) can run code
+* built for XNACK-off. On GFXv9 it may perform slower.
+*
+* Therefore applications built for XNACK-off can always be
+* supported and will be our fallback if any GPU does not
+* support retry.
+*/
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_dev *dev = p->pdds[i]->dev;
+
+   /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+* support the SVM APIs and don't need to be considered
+* for the XNACK mode selection.
+*/
+   if (dev->device_info->asic_family < CHIP_VEGA10)
+   continue;
+   /* Aldebaran can always support XNACK because it can support
+* per-process XNACK mode selection. But let the dev->noretry
+* setting still influence the default XNACK mode.
+*/
+   if (supported &&
+

[PATCH 11/34] drm/amdgpu: Enable retry faults unconditionally on Aldebaran

2021-03-31 Thread Felix Kuehling

This is needed to allow per-process XNACK mode selection in the SQ when
booting with XNACK off by default.

Signed-off-by: Felix Kuehling 
Reviewed-by: Philip Yang 
Tested-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 8 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c  | 6 --
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4bcc03c4c6c5..09f88874bf56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -640,7 +640,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
 
 /**
  * DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
  * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
  */
 MODULE_PARM_DESC(noretry,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 5bb9856bd8a9..f2fb2cac5c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -292,10 +292,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct 
amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
-   /* Send no-retry XNACK on fault to suppress VM fault storm. */
+   /* Send no-retry XNACK on fault to suppress VM fault storm.
+* On Aldebaran, XNACK can be enabled in the SQ per-process.
+* Retry faults need to be enabled for that to work.
+*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-   !adev->gmc.noretry);
+   !adev->gmc.noretry ||
+   adev->asic_type == CHIP_ALDEBARAN);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, 
mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 29d7f50912ee..b9d789a9e49e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -298,10 +298,12 @@ static void mmhub_v1_7_setup_vmid_config(struct 
amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
-   /* Send no-retry XNACK on fault to suppress VM fault storm. */
+   /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+* Retry faults need to be enabled for that to work.
+*/
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-   !adev->gmc.noretry);
+   1);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, 
regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 10/34] drm/amdkfd: svm range eviction and restore

2021-03-31 Thread Felix Kuehling

HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 137 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   2 +
 4 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0d19a13fc227..1b829eef9e50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -738,6 +738,8 @@ struct svm_range_list {
struct work_struct  deferred_list_work;
struct list_headdeferred_range_list;
spinlock_t  deferred_list_lock;
+   atomic_tevicted_ranges;
+   struct delayed_work restore_work;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1f3d4f5c64a8..3bcde43ccd70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
 
cancel_delayed_work_sync(>eviction_work);
cancel_delayed_work_sync(>restore_work);
+   cancel_delayed_work_sync(>svms.restore_work);
 
mutex_lock(>mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4736fe996feb..b0e0b243754c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -21,6 +21,7 @@
  */
 
 #include 
+#include 
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -28,6 +29,8 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -250,6 +253,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>insert_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
+   atomic_set(>invalid, 0);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
@@ -964,6 +968,129 @@ svm_range_list_lock_and_flush_work(struct svm_range_list 
*svms,
goto retry_flush_work;
 }
 
+static void svm_range_restore_work(struct work_struct *work)
+{
+   struct delayed_work *dwork = to_delayed_work(work);
+   struct amdkfd_process_info *process_info;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   struct kfd_process *p;
+   struct mm_struct *mm;
+   int evicted_ranges;
+   int invalid;
+   int r;
+
+   svms = container_of(dwork, struct svm_range_list, restore_work);
+   evicted_ranges = atomic_read(>evicted_ranges);
+   if (!evicted_ranges)
+   return;
+
+   pr_debug("restore svm ranges\n");
+
+   /* kfd_process_notifier_release destroys this worker thread. So during
+* the lifetime of this thread, kfd_process and mm will be valid.
+*/
+   p = container_of(svms, struct kfd_process, svms);
+   process_info = p->kgd_process_info;
+   mm = p->mm;
+   if (!mm)
+   return;
+
+   mutex_lock(_info->lock);
+   svm_range_list_lock_and_flush_work(svms, mm);
+   mutex_lock(>lock);
+
+   evicted_ranges = atomic_read(>evicted_ranges);
+
+   list_for_each_entry(prange, >list, list) {
+   invalid = atomic_read(>invalid);
+   if (!invalid)
+   continue;
+
+   pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+prange->svms, prange, prange->start, prange->last,
+invalid);
+
+   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+  false, true);
+   if (r) {
+   pr_debug("failed %d to map 0x%lx to gpus\n", r,
+prange->start);
+   goto unlock_out;
+   }
+
+   if (atomic_cmpxchg(>invalid, invalid, 0) != invalid)
+   goto unlock_out;
+   }
+
+   if (atomic_cmpxchg(>evicted_ranges, evicted_ranges, 0) !=
+   evicted_ranges)
+   goto unlock_out;
+
+   evicted_ranges

[PATCH 09/34] drm/amdkfd: map svm range to GPUs

2021-03-31 Thread Felix Kuehling

Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap
svm range system memory pages address to GPUs.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 478 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +
 2 files changed, 479 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ddb1e2a29881..4736fe996feb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -98,11 +98,123 @@ static void svm_range_remove_notifier(struct svm_range 
*prange)
mmu_interval_notifier_remove(>notifier);
 }
 
+static int
+svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
+ unsigned long *hmm_pfns, uint64_t npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   dma_addr_t *addr = *dma_addr;
+   struct page *page;
+   int i, r;
+
+   if (!addr) {
+   addr = kvmalloc_array(npages, sizeof(*addr),
+ GFP_KERNEL | __GFP_ZERO);
+   if (!addr)
+   return -ENOMEM;
+   *dma_addr = addr;
+   }
+
+   for (i = 0; i < npages; i++) {
+   if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
+ "leaking dma mapping\n"))
+   dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+   page = hmm_pfn_to_page(hmm_pfns[i]);
+   addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+   r = dma_mapping_error(dev, addr[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   return r;
+   }
+   pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
+addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+   }
+   return 0;
+}
+
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long *hmm_pfns)
+{
+   struct kfd_process *p;
+   uint32_t gpuidx;
+   int r;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+
+   for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+   struct kfd_process_device *pdd;
+   struct amdgpu_device *adev;
+
+   pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   return -EINVAL;
+   }
+   adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+   r = svm_range_dma_map_dev(adev->dev, >dma_addr[gpuidx],
+ hmm_pfns, prange->npages);
+   if (r)
+   break;
+   }
+
+   return r;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+unsigned long offset, unsigned long npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   int i;
+
+   if (!dma_addr)
+   return;
+
+   for (i = offset; i < offset + npages; i++) {
+   if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+   continue;
+   pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+   dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+   dma_addr[i] = 0;
+   }
+}
+
+static void svm_range_free_dma_mappings(struct svm_range *prange)
+{
+   struct kfd_process_device *pdd;
+   dma_addr_t *dma_addr;
+   struct device *dev;
+   struct kfd_process *p;
+   uint32_t gpuidx;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+
+   for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+   dma_addr = prange->dma_addr[gpuidx];
+   if (!dma_addr)
+   continue;
+
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   continue;
+   }
+   dev = >dev->pdev->dev;
+   svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+   kvfree(dma_addr);
+   prange->dma_addr[gpuidx] = NULL;
+   }
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_free_dma_mappings(prange);
mutex_destroy(>lock);
kfree(prange);
 }
@@ -148,6 +260,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
return prange;
 }
 
+static int svm_range_bo_validate(void

[PATCH 08/34] drm/amdgpu: export vm update mapping interface

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

It will be used by kfd to map svm range to GPU, because svm range does
not have amdgpu_bo and bo_va, cannot use amdgpu_bo_update interface, use
amdgpu vm update interface directly.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc4d6ae71476..7e306fd20de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1592,15 +1592,15 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
  * Returns:
  * 0 for success, -EINVAL for failure.
  */
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-  struct amdgpu_device *bo_adev,
-  struct amdgpu_vm *vm, bool immediate,
-  bool unlocked, struct dma_resv *resv,
-  uint64_t start, uint64_t last,
-  uint64_t flags, uint64_t offset,
-  struct drm_mm_node *nodes,
-  dma_addr_t *pages_addr,
-  struct dma_fence **fence)
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e5a8b9..848e175e99ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -366,6 +366,8 @@ struct amdgpu_vm_manager {
spinlock_t  pasid_lock;
 };
 
+struct amdgpu_bo_va_mapping;
+
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) 
((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) 
((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), 
(incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) 
((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), 
(incr), (flags)))
@@ -397,6 +399,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
   struct amdgpu_vm *vm);
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 07/34] drm/amdkfd: deregister svm range

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

When application explicitly call unmap or unmap from mmput when
application exit, driver will receive MMU_NOTIFY_UNMAP event to remove
svm range from process svms object tree and list first, unmap from GPUs
(in the following patch).

Split the svm ranges to handle partial unmapping of svm ranges. To
avoid deadlocks, updating MMU notifiers, range lists and interval trees
is done in a deferred worker. New child ranges are attached to their
parent range's child_list until the worker can update the
svm_range_list. svm_range_set_attr flushes deferred work and takes the
mmap_write_lock to guarantee that it has an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 285 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  18 ++
 3 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c1d7789b91e..0d19a13fc227 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -735,6 +735,9 @@ struct svm_range_list {
struct mutexlock;
struct rb_root_cached   objects;
struct list_headlist;
+   struct work_struct  deferred_list_work;
+   struct list_headdeferred_range_list;
+   spinlock_t  deferred_list_lock;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c6a9766b97a6..ddb1e2a29881 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -136,6 +136,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>deferred_list);
+   INIT_LIST_HEAD(>child_list);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
@@ -412,6 +414,17 @@ svm_range_split_head(struct svm_range *prange, struct 
svm_range *new,
return r;
 }
 
+void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+struct svm_range *pchild, enum svm_work_list_ops op)
+{
+   pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+pchild, pchild->start, pchild->last, prange, op);
+
+   pchild->work_item.mm = mm;
+   pchild->work_item.op = op;
+   list_add_tail(>child_list, >child_list);
+}
+
 /*
  * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
  *
@@ -471,6 +484,30 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
return r;
 }
 
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+  struct mm_struct *mm)
+{
+retry_flush_work:
+   flush_work(>deferred_list_work);
+   mmap_write_lock(mm);
+
+   if (list_empty(>deferred_range_list))
+   return;
+   mmap_write_unlock(mm);
+   pr_debug("retry flush\n");
+   goto retry_flush_work;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
struct svm_range *new;
@@ -611,15 +648,255 @@ svm_range_handle_overlap(struct svm_range_list *svms, 
struct svm_range *new,
return r;
 }
 
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+   struct svm_range *prange)
+{
+   unsigned long start;
+   unsigned long last;
+
+   start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+   last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+   if (prange->start == start && prange->last == last)
+   return;
+
+   pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ prange->svms, prange, start, last, prange->start,
+ prange->last);
+
+   if (start != 0 && last != 0) {
+   interval_tree_remove(>it_node, >svms->objects);
+   svm_range_remove_notifier(prange);
+   }
+   prange->it_node.start = prange->start;
+   prange->it_node.last = prange->last;
+
+   interval_tree_insert(>it_node, >svms->objects);
+   svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+   struct mm_struct *mm = prange->work_item.mm;
+
+   switch (prange->work_item.op) {
+   case SVM_OP_NULL:
+

[PATCH 06/34] drm/amdkfd: validate svm range system memory

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Use HMM to get system memory pages address, which will be used to
map to GPUs or migrate to vram.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 116 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  18 +
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index de62265adeaa..c6a9766b97a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -28,6 +28,15 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+   const struct mmu_notifier_range *range,
+   unsigned long cur_seq);
+
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+   .invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
 /**
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
@@ -46,6 +55,18 @@ static void svm_range_unlink(struct svm_range *prange)
interval_tree_remove(>it_node, >svms->objects);
 }
 
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+   pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange, prange->start, prange->last);
+
+   mmu_interval_notifier_insert_locked(>notifier, mm,
+prange->start << PAGE_SHIFT,
+prange->npages << PAGE_SHIFT,
+_range_mn_ops);
+}
+
 /**
  * svm_range_add_to_svms - add svm range to svms
  * @prange: svm range structure to be added
@@ -65,11 +86,24 @@ static void svm_range_add_to_svms(struct svm_range *prange)
interval_tree_insert(>it_node, >svms->objects);
 }
 
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+   pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange,
+prange->notifier.interval_tree.start >> PAGE_SHIFT,
+prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+   if (prange->notifier.interval_tree.start != 0 &&
+   prange->notifier.interval_tree.last != 0)
+   mmu_interval_notifier_remove(>notifier);
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   mutex_destroy(>lock);
kfree(prange);
 }
 
@@ -102,6 +136,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -377,6 +412,65 @@ svm_range_split_head(struct svm_range *prange, struct 
svm_range *new,
return r;
 }
 
+/*
+ * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
+ *
+ * To prevent concurrent destruction or change of range attributes, the
+ * svm_read_lock must be held. The caller must not hold the svm_write_lock
+ * because that would block concurrent evictions and lead to deadlocks. To
+ * serialize concurrent migrations or validations of the same range, the
+ * prange->migrate_mutex must be held.
+ *
+ * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
+ * eviction fence.
+ *
+ * The following sequence ensures race-free validation and GPU mapping:
+ *
+ * 1. Reserve page table (and SVM BO if range is in VRAM)
+ * 2. hmm_range_fault to get page addresses (if system memory)
+ * 3. DMA-map pages (if system memory)
+ * 4-a. Take notifier lock
+ * 4-b. Check that pages still valid (mmu_interval_read_retry)
+ * 4-c. Check that the range was not split or otherwise invalidated
+ * 4-d. Update GPU page table
+ * 4.e. Release notifier lock
+ * 5. Release page table (and SVM BO) reservation
+ */
+static int svm_range_validate_and_map(struct mm_struct *mm,
+ struct svm_range *prange,
+ uint32_t gpuidx, bool intr, bool wait)
+{
+   struct hmm_range *hmm_range;
+   int r = 0;
+
+   if (!prange->actual_loc) {
+   r = amdgpu_hmm_range_get_pages(>notifier, mm, NULL,
+  prange->start << PAGE_SHIFT,
+  prange->npages, _range,
+  false, true);
+   if (r) {
+   pr_debug("failed %d to get svm range pages\n", r);
+   goto unreserve_out;
+

[PATCH 05/34] drm/amdgpu: add common HMM get pages function

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Move the HMM get pages function from amdgpu_ttm and to amdgpu_mn. This
common function will be used by new svm APIs.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  | 83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h  |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 +++---
 3 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 828b5167ff12..997da4237a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -155,3 +155,86 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mmu_interval_notifier_remove(>notifier);
bo->notifier.mm = NULL;
 }
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked)
+{
+   struct hmm_range *hmm_range;
+   unsigned long timeout;
+   unsigned long i;
+   unsigned long *pfns;
+   int r = 0;
+
+   hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+   if (unlikely(!hmm_range))
+   return -ENOMEM;
+
+   pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_range;
+   }
+
+   hmm_range->notifier = notifier;
+   hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+   if (!readonly)
+   hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+   hmm_range->hmm_pfns = pfns;
+   hmm_range->start = start;
+   hmm_range->end = start + npages * PAGE_SIZE;
+   timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+   hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+
+   if (likely(!mmap_locked))
+   mmap_read_lock(mm);
+
+   r = hmm_range_fault(hmm_range);
+
+   if (likely(!mmap_locked))
+   mmap_read_unlock(mm);
+   if (unlikely(r)) {
+   /*
+* FIXME: This timeout should encompass the retry from
+* mmu_interval_read_retry() as well.
+*/
+   if (r == -EBUSY && !time_after(jiffies, timeout))
+   goto retry;
+   goto out_free_pfns;
+   }
+
+   /*
+* Due to default_flags, all pages are HMM_PFN_VALID or
+* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+* the notifier_lock, and mmu_interval_read_retry() must be done first.
+*/
+   for (i = 0; pages && i < npages; i++)
+   pages[i] = hmm_pfn_to_page(pfns[i]);
+
+   *phmm_range = hmm_range;
+
+   return 0;
+
+out_free_pfns:
+   kvfree(pfns);
+out_free_range:
+   kfree(hmm_range);
+
+   return r;
+}
+
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+   int r;
+
+   r = mmu_interval_read_retry(hmm_range->notifier,
+   hmm_range->notifier_seq);
+   kvfree(hmm_range->hmm_pfns);
+   kfree(hmm_range);
+
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index a292238f75eb..7f7d37a457c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,6 +30,13 @@
 #include 
 #include 
 
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked);
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
 #if defined(CONFIG_HMM_MIRROR)
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 41a4c456961c..a2585058e65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -32,7 +32,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -670,10 +669,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
-   struct hmm_range *range;
-   unsigned long timeout;
struct mm_struct *mm;
-   unsigned long i;
+   bool readonly;
int r = 0;
 
mm = bo->notifier.mm;
@@ -689,76 +686,26 @@ int

[PATCH 04/34] drm/amdkfd: add svm ioctl GET_ATTR op

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Get the intersection of attributes over all memory in the given
range

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 164 +++
 1 file changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 21e6a7959bc7..de62265adeaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -707,6 +707,167 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
return r;
 }
 
+static int
+svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+   DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+   DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+   bool get_preferred_loc = false;
+   bool get_prefetch_loc = false;
+   bool get_granularity = false;
+   bool get_accessible = false;
+   bool get_flags = false;
+   uint64_t last = start + size - 1UL;
+   struct mm_struct *mm = current->mm;
+   uint8_t granularity = 0xff;
+   struct interval_tree_node *node;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t flags = 0x;
+   int gpuidx;
+   uint32_t i;
+
+   pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", >svms, start,
+start + size - 1, nattr);
+
+   mmap_read_lock(mm);
+   if (!svm_range_is_valid(mm, start, size)) {
+   pr_debug("invalid range\n");
+   mmap_read_unlock(mm);
+   return -EINVAL;
+   }
+   mmap_read_unlock(mm);
+
+   for (i = 0; i < nattr; i++) {
+   switch (attrs[i].type) {
+   case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+   get_preferred_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+   get_prefetch_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_ACCESS:
+   get_accessible = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+   get_flags = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+   get_granularity = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+   case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+   case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+   fallthrough;
+   default:
+   pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+   return -EINVAL;
+   }
+   }
+
+   svms = >svms;
+
+   mutex_lock(>lock);
+
+   node = interval_tree_iter_first(>objects, start, last);
+   if (!node) {
+   pr_debug("range attrs not found return default values\n");
+   svm_range_set_default_attributes(, _loc,
+, );
+   /* TODO: Automatically create SVM ranges and map them on
+* GPU page faults
+   if (p->xnack_enabled)
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+*/
+
+   goto fill_values;
+   }
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+   bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE);
+
+   while (node) {
+   struct interval_tree_node *next;
+
+   prange = container_of(node, struct svm_range, it_node);
+   next = interval_tree_iter_next(node, start, last);
+
+   if (get_preferred_loc) {
+   if (prange->preferred_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+location != prange->preferred_loc)) {
+   location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_preferred_loc = false;
+   } else {
+   location = prange->preferred_loc;
+   }
+   }
+   if (get_prefetch_loc) {
+   if (prange->prefetch_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+prefetch_loc != prange->prefetch_loc)) {
+   prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_prefetch_loc = false;
+   } else {

[PATCH 03/34] drm/amdkfd: register svm range

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

svm range structure stores the range start address, size, attributes,
flags, prefetch location and gpu bitmap which indicates which GPU this
range maps to. Same virtual address is shared by CPU and GPUs.

Process has svm range list which uses both interval tree and list to
store all svm ranges registered by the process. Interval tree is used by
GPU vm fault handler and CPU page fault handler to get svm range
structure from the specific address. List is used to scan all ranges in
eviction restore work.

No overlap range interval [start, last] exist in svms object interval
tree. If process registers new range which has overlap with old range,
the old range split into 2 ranges depending on the overlap happens at
head or tail part of old range.

Apply attributes preferred location, prefetch location, mapping flags,
migration granularity to svm range, store mapping gpu index into bitmap.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  17 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   8 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   9 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 729 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  85 +++
 6 files changed, 850 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index e1e4115dcf78..387ce0217d35 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -54,7 +54,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
-   $(AMDKFD_PATH)/kfd_crat.o
+   $(AMDKFD_PATH)/kfd_crat.o \
+   $(AMDKFD_PATH)/kfd_svm.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dbc824cc6b32..9511826ac8ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
+#include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 
@@ -1744,11 +1745,27 @@ static int kfd_ioctl_smi_events(struct file *filep,
 
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
+   struct kfd_ioctl_svm_args *args = data;
int r = 0;
 
if (p->svm_disabled)
return -EPERM;
 
+   pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+args->start_addr, args->size, args->op, args->nattr);
+
+   if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+   return -EINVAL;
+   if (!args->start_addr || !args->size)
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+ args->attrs);
+
+   mutex_unlock(>mutex);
+
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d701b53b9bc3..7c1d7789b91e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -731,6 +731,12 @@ struct kfd_process_device {
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 
+struct svm_range_list {
+   struct mutexlock;
+   struct rb_root_cached   objects;
+   struct list_headlist;
+};
+
 /* Process data */
 struct kfd_process {
/*
@@ -810,6 +816,8 @@ struct kfd_process {
struct kobject *kobj_queues;
struct attribute attr_pasid;
 
+   /* shared virtual memory registered by this process */
+   struct svm_range_list svms;
bool svm_disabled;
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1a6236317cd5..1f3d4f5c64a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
 #include 
 #include "amdgpu_amdkfd.h"
 #include "amdgpu.h"
+#include "kfd_svm.h"
 
 struct mm_struct;
 
@@ -42,6 +43,7 @@ struct mm_struct;
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
 #include "kfd_iommu.h"
+#include "kfd_svm.h"
 
 /*
  * List of struct kfd_process (field kfd_process).
@@ -1003,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct 
*work)
kfd_iommu_unbind_process(p);
 
kfd_process_free_outstanding_kfd_bos(p);
+   svm_range_list_fini(p);
 
kfd_process_destroy_pdds(p);

[PATCH 02/34] drm/amdkfd: add svm ioctl API

2021-03-31 Thread Felix Kuehling

From: Philip Yang 

Add svm (shared virtual memory) ioctl data structure and API definition.

The svm ioctl API is designed to be extensible in the future. All
operations are provided by a single IOCTL to preserve ioctl number
space. The arguments structure ends with a variable size array of
attributes that can be used to set or get one or multiple attributes.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  12 ++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 include/uapi/linux/kfd_ioctl.h   | 130 ++-
 5 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 43de260b2230..dbc824cc6b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1742,6 +1742,16 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+   int r = 0;
+
+   if (p->svm_disabled)
+   return -EPERM;
+
+   return r;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1840,6 +1850,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index a2c9063076cc..52da1a3b2c7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
+   /* VI GPUs cannot support SVM with only
+* 40 bits of virtual address space.
+*/
+   process->svm_disabled |= true;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 17d91f05afe3..d701b53b9bc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -809,6 +809,8 @@ struct kfd_process {
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
+
+   bool svm_disabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d2cd757670d2..1a6236317cd5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1208,6 +1208,7 @@ static struct kfd_process *create_process(const struct 
task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+   process->svm_disabled = false;
INIT_DELAYED_WORK(>eviction_work, evict_process_worker);
INIT_DELAYED_WORK(>restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index bf5e7d7846dd..247b57baa94f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -30,9 +30,10 @@
  * - 1.1 - initial version
  * - 1.3 - Add SMI events support
  * - 1.4 - Indicate new SRAM EDC bit in device properties
+ * - 1.5 - Add SVM API
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 4
+#define KFD_IOCTL_MINOR_VERSION 5
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
@@ -473,6 +474,129 @@ enum kfd_mmio_remap {
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
 };
 
+/* Guarantee host access to memory */
+#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x0001
+/* Fine grained coherency between all devices with access */
+#define KFD_IOCTL_SVM_FLAG_COHERENT0x0002
+/* Use any GPU in same hive as preferred device */
+#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x0004
+/* GPUs only read, allows replication */
+#define KFD_IOCTL_SVM_FLAG_GPU_RO  0x0008
+/* Allow execution on GPU */
+#define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010
+/* GPUs mostly read, may allow similar optimizations

[PATCH 01/34] drm/amdkfd: helper to convert gpu id and idx

2021-03-31 Thread Felix Kuehling

From: Alex Sierra 

svm range uses gpu bitmap to store which GPU svm range maps to.
Application pass driver gpu id to specify GPU, the helper is needed to
convert gpu id to gpu bitmap idx.

Access through kfd_process_device pointers array from kfd_process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 11 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++
 2 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0b6595f7acda..17d91f05afe3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -842,6 +842,17 @@ struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+   uint32_t gpuidx, uint32_t *gpuid) {
+   return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
+}
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+   struct kfd_process *p, uint32_t gpuidx) {
+   return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4241d29ea94..d2cd757670d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1607,6 +1607,16 @@ int kfd_process_restore_queues(struct kfd_process *p)
return ret;
 }
 
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+   return i;
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 00/34] Add HMM-based SVM memory manager to KFD v3

2021-03-31 Thread Felix Kuehling

Since the last patch series on March 22, I integrated all fixes into
the original patch series. An additonal fix was added for handling
failed migrations during GPU page faults. (A bigger rework of
migrations and VRAM mappings will come in the future.) Support for
per-process XNACK mode selecation was added for Aldebaran. The
initialization of svm_migrate was moved to happend before
kfd_topology_add_device in order to fix reporting of the
SVMAPI_SUPPORTED capability. An updated kfdtest now checks this
capability before running any SVM tests.

Support for SVM can now be controlled by a Kconfig option added in
patch 34.

This series and the corresponding ROCm Thunk and KFDTest changes are also
available on gitub:
  https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
  https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip

Alex Sierra (9):
  drm/amdkfd: helper to convert gpu id and idx
  drm/amdkfd: add xnack enabled flag to kfd_process
  drm/amdkfd: add ioctl to configure and query xnack retries
  drm/amdgpu: enable 48-bit IH timestamp counter
  drm/amdkfd: SVM API call to restore page tables
  drm/amdkfd: add svm_bo reference for eviction fence
  drm/amdgpu: add param bit flag to create SVM BOs
  drm/amdgpu: svm bo enable_signal call condition
  drm/amdgpu: add svm_bo eviction to enable_signal cb

Felix Kuehling (13):
  drm/amdkfd: map svm range to GPUs
  drm/amdkfd: svm range eviction and restore
  drm/amdgpu: Enable retry faults unconditionally on Aldebaran
  drm/amdkfd: validate vram svm range from TTM
  drm/amdkfd: HMM migrate ram to vram
  drm/amdkfd: HMM migrate vram to ram
  drm/amdkfd: invalidate tables on page retry fault
  drm/amdkfd: page table restore through svm API
  drm/amdkfd: add svm_bo eviction mechanism support
  drm/amdkfd: refine migration policy with xnack on
  drm/amdkfd: add svm range validate timestamp
  drm/amdkfd: multiple gpu migrate vram to vram
  drm/amdkfd: Add CONFIG_HSA_AMD_SVM

Philip Yang (12):
  drm/amdkfd: add svm ioctl API
  drm/amdkfd: register svm range
  drm/amdkfd: add svm ioctl GET_ATTR op
  drm/amdgpu: add common HMM get pages function
  drm/amdkfd: validate svm range system memory
  drm/amdkfd: deregister svm range
  drm/amdgpu: export vm update mapping interface
  drm/amdkfd: register HMM device private zone
  drm/amdkfd: support xgmi same hive mapping
  drm/amdkfd: copy memory through gart table
  drm/amdgpu: reserve fence slot to update page table
  drm/amdkfd: Add SVM API support capability bits

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  |   16 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|   83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   90 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   48 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   11 +
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c  |8 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c   |6 +-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 +
 drivers/gpu/drm/amd/amdkfd/Kconfig|   14 +
 drivers/gpu/drm/amd/amdkfd/Makefile   |5 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   64 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |4 +
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  |   13 +-
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |4 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |  922 ++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h  |   64 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   36 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   82 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 2866 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  205 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |6 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   10 +-
 include/uapi/linux/kfd_ioctl.h|  171 +-
 28 files changed, 4652 insertions(+), 108 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h

-- 
2.31.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: add DMUB outbox event IRQ source define/complete/debug flag

2021-03-31 Thread Jude Shih

[Why & How]
We use outbox interrupt that allows us to do the AUX via DMUB
Therefore, we need to add some irq source related definition
in the header files;
Also, I added debug flag that allows us to turn it on/off
for testing purpose.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 2 +-
 drivers/gpu/drm/amd/include/amd_shared.h  | 3 ++-
 drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h | 2 ++
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 963ecfd84347..479c8a28a3a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -923,6 +923,7 @@ struct amdgpu_device {
struct amdgpu_irq_src   pageflip_irq;
struct amdgpu_irq_src   hpd_irq;
struct amdgpu_irq_src   dmub_trace_irq;
+   struct amdgpu_irq_src   outbox_irq;
 
/* rings */
u64 fence_context;
@@ -1077,6 +1078,7 @@ struct amdgpu_device {
 
boolin_pci_err_recovery;
struct pci_saved_state  *pci_state;
+   struct completion dmub_aux_transfer_done;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6a06234dbcad..0b88e13f5a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -159,7 +159,7 @@ int amdgpu_smu_pptable_id = -1;
  * PSR (bit 3) disabled by default
  */
 uint amdgpu_dc_feature_mask = 2;
-uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_debug_mask = 0x10;
 int amdgpu_async_gfx_ring = 1;
 int amdgpu_mcbp;
 int amdgpu_discovery = -1;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
b/drivers/gpu/drm/amd/include/amd_shared.h
index 43ed6291b2b8..097672cc78a1 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -227,7 +227,8 @@ enum DC_DEBUG_MASK {
DC_DISABLE_PIPE_SPLIT = 0x1,
DC_DISABLE_STUTTER = 0x2,
DC_DISABLE_DSC = 0x4,
-   DC_DISABLE_CLOCK_GATING = 0x8
+   DC_DISABLE_CLOCK_GATING = 0x8,
+   DC_ENABLE_DMUB_AUX = 0x10,
 };
 
 enum amd_dpm_forced_level;
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h 
b/drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h
index e2bffcae273a..754170a86ea4 100644
--- a/drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h
+++ b/drivers/gpu/drm/amd/include/ivsrcid/dcn/irqsrcs_dcn_1_0.h
@@ -1132,5 +1132,7 @@
 
 #define DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT   0x68
 #define DCN_1_0__CTXID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT   6
+#define DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT0x68 // 
DMCUB_IHC_outbox1_ready_int IHC_DMCUB_outbox1_ready_int_ack 
DMCUB_OUTBOX_LOW_PRIORITY_READY_INTERRUPT DISP_INTERRUPT_STATUS_CONTINUE24 
Level/Pulse
+#define DCN_1_0__CTXID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT8
 
 #endif // __IRQSRCS_DCN_1_0_H__
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[pull] amdgpu, amdkfd drm-fixes-5.12

2021-03-31 Thread Alex Deucher

Hi Dave, Daniel,

Fixes for 5.12.

The following changes since commit 09d78dde88ef95a27b54a6e450ee700ccabdf39d:

  Merge tag 'drm-msm-fixes-2021-02-25' of 
https://gitlab.freedesktop.org/drm/msm into drm-fixes (2021-03-26 13:04:17 
+1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.12-2021-03-31

for you to fetch changes up to e3512fb67093fabdf27af303066627b921ee9bd8:

  drm/amdgpu: check alignment on CPU page for bo map (2021-03-31 21:53:38 -0400)


amd-drm-fixes-5.12-2021-03-31:

amdgpu:
- Polaris idle power fix
- VM fix
- Vangogh S3 fix
- Fixes for non-4K page sizes

amdkfd:
- dqm fence memory corruption fix


Alex Deucher (1):
  drm/amdgpu/vangogh: don't check for dpm in is_dpm_running when in suspend

Evan Quan (1):
  drm/amd/pm: no need to force MCLK to highest when no display connected

Huacai Chen (1):
  drm/amdgpu: Set a suitable dev_info.gart_page_size

Nirmoy Das (1):
  drm/amdgpu: fix offset calculation in amdgpu_vm_bo_clear_mappings()

Qu Huang (1):
  drm/amdkfd: dqm fence memory corruption

Xℹ Ruoyao (1):
  drm/amdgpu: check alignment on CPU page for bo map

 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  8 
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c   |  3 ++-
 drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  |  5 +
 11 files changed, 26 insertions(+), 20 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [pull] amdgpu, amdkfd, radeon drm-next-5.12

2021-03-31 Thread Dave Airlie

I think this is due to this pull, on arm32.

/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:
In function ‘dmub_srv_hw_init’:
/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:519:44:
warning: cast from pointer to integer of different size
[-Wpointer-to-int-cast]
  outbox0_rb_params.base_address = (void
*)((uint64_t)(tracebuff_fb->cpu_addr) + TRACE_BUFFER_ENTRY_OFFSET);
^
/home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:519:35:
warning: cast to pointer from integer of different size
[-Wint-to-pointer-cast]
  outbox0_rb_params.base_address = (void
*)((uint64_t)(tracebuff_fb->cpu_addr) + TRACE_BUFFER_ENTRY_OFFSET);

Dave.

On Sat, 27 Mar 2021 at 05:16, Zhuo, Qingqing  wrote:
>
> [AMD Public Use]
>
> On Thu, Feb 18, 2021 at 11:15 PM Alex Deucher  wrote:
> >>
> >> Hi Dave, Daniel,
> >>
> >> Fixes for 5.12.
> >>
> >> The following changes since commit 
> >> 4c3a3292730c56591472717d8c5c0faf74f6c6bb:
> >>
> >>   drm/amd/display: fix unused variable warning (2021-02-05 09:49:44
> >> +1000)
> >>
> >> are available in the Git repository at:
> >>
> >>
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitl
> >> ab.freedesktop.org%2Fagd5f%2Flinux.gitdata=04%7C01%7Cqingqing.zhu
> >> o%40amd.com%7Cce0d1ee6a18b4a95366008d8f082048e%7C3dd8961fe4884e608e11a
> >> 82d994e183d%7C0%7C0%7C637523789263486288%7CUnknown%7CTWFpbGZsb3d8eyJWI
> >> joiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000
> >> mp;sdata=Ig3OkPN0X8OtCOHDJqV%2FZSEOsL7gPs8OMh9sXDniR2w%3Dreserved
> >> =0 tags/amd-drm-next-5.12-2021-02-18
> >>
> >> for you to fetch changes up to 6e80fb8ab04f6c4f377e2fd422bdd1855beb7371:
> >>
> >>   drm/amdgpu: Set reference clock to 100Mhz on Renoir (v2) (2021-02-18
> >> 16:43:09 -0500)
>
> > Pulled into drm-next, with some conflicts, please double-check.
>
> > I also spotted
>
> > commit ea3b4242bc9ca197762119382b37e125815bd67f
> > Author: Qingqing Zhuo 
> > Date:   Tue Feb 9 16:36:41 2021 -0500
>
> >   drm/amd/display: Fix system hang after multiple hotplugs (v3)
>
> > I think it would be good if that could use the drm_vblank_work stuff from 
> > Lyude instead of hand-rolling your own.
> > -Daniel
>
> Hi Daniel,
>
> Thank you for the suggestion! I need to look into further and will do so as 
> soon as I have bandwidth.
>
> Thanks,
> Lillian
>
> >>
> >> 
> >> amd-drm-next-5.12-2021-02-18:
> >>
> >> amdgpu:
> >> - Prefer Bhawan's unused variable fix
> >> - Fixes for high priority queues on gfx8,9
> >> - swSMU fixes for sienna cichlid
> >> - swSMU fixes for renoir
> >> - mmhub client id fixes for arcturus
> >> - SMUIO fixes for navi family
> >> - swSMU fixes for vangogh
> >> - GPU reset cleanup
> >> - Display fixes
> >> - GFX harvesting fix for sienna cichlid
> >> - Fix reference clock on Renoir
> >> - Misc fixes and cleanups
> >>
> >> amdkfd:
> >> - Fix for unique id query
> >> - Fix recursive lock warnings
> >>
> >> radeon:
> >> - Remove confusing VCE messages on Oland
> >>
> >> 
> >> Alex Deucher (16):
> >>   Revert "drm/amd/display: fix unused variable warning"
> >>   drm/amdgpu/smu12: fix power reporting on renoir
> >>   drm/amdgpu/gmc9: fix mmhub client mapping for arcturus
> >>   drm/amdgpu/si: minor clean up of reset code
> >>   drm/amdgpu/cik: minor clean up of reset code
> >>   drm/amdgpu/vi: minor clean up of reset code
> >>   drm/amdgpu: add generic pci reset as an option
> >>   drm/amdgpu/si: add PCI reset support
> >>   drm/amdgpu/soc15: add PCI reset support
> >>   drm/amdgpu/nv: add PCI reset support
> >>   drm/amdgpu: drop extra drm_kms_helper_poll_enable/disable calls
> >>   drm/amdgpu: use runpm flag rather than fbcon for kfd runtime suspend 
> >> (v2)
> >>   drm/amdgpu: reset runpm flag if device suspend fails
> >>   Revert "drm/amd/display: Update NV1x SR latency values"
> >>   drm/radeon: OLAND boards don't have VCE
> >>   drm/amdgpu: Set reference clock to 100Mhz on Renoir (v2)
> >>
> >> Anthony Koo (1):
> >>   drm/amd/display: [FW Promotion] Release 0.0.51
> >>
> >> Aric Cyr (1):
> >>   drm/amd/display: 3.2.122
> >>
> >> Bhawanpreet Lakha (1):
> >>   drm/amd/display: Fix unused variable warning
> >>
> >> Dale Zhao (1):
> >>   drm/amd/display: fix type mismatch error for return variable
> >>
> >> Derek Lai (1):
> >>   drm/amd/display: Add DIG_CLOCK_PATTERN in the transmitter
> >> control
> >>
> >> Eric Yang (1):
> >>   drm/amd/display: move edp sink present detection to hw init
> >>
> >> Fangzhi Zuo (1):
> >>   drm/amd/display: Add return code instead of boolean for future
> >> use
> >>
> >> Felix Kuehling (1):
> >>   drm/amdkfd: Fix recursive lock warnings
> >>
> >> Gustavo A.

Re: [PATCH 2/4] drm/amd/display: Add FPU event trace

2021-03-31 Thread kernel test robot

Hi Rodrigo,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20210331]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3 v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/5859110d0579f7ee57ca1b1840c3960492a9c0c0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout 5859110d0579f7ee57ca1b1840c3960492a9c0c0
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:29:10: fatal error: 
>> asm/fpu/api.h: No such file or directory
  29 | #include 
 |  ^~~
   compilation terminated.


vim +29 drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c

28  
  > 29  #include 
30  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/4] drm/amd/display: Add FPU event trace

2021-03-31 Thread kernel test robot

Hi Rodrigo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20210331]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3 v5.12-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# 
https://github.com/0day-ci/linux/commit/5859110d0579f7ee57ca1b1840c3960492a9c0c0
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout 5859110d0579f7ee57ca1b1840c3960492a9c0c0
# save the attached .config to linux build tree
make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:41:6: warning: no 
>> previous prototype for 'dc_fpu_begin' [-Wmissing-prototypes]
  41 | void dc_fpu_begin(const char *function_name, const int line)
 |  ^~~~
>> drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c:57:6: warning: no 
>> previous prototype for 'dc_fpu_end' [-Wmissing-prototypes]
  57 | void dc_fpu_end(const char *function_name, const int line)
 |  ^~


vim +/dc_fpu_begin +41 drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/dc_fpu.c

30  
31  /**
32   * dc_fpu_begin - Enables FPU protection
33   * @function_name: A string containing the function name for debug 
purposes
34   * @line: A-line number where DC_FP_START was invoked for debug purpose
35   *
36   * This function is responsible for managing the use of 
kernel_fpu_begin() with
37   * the advantage of providing an event trace for debugging.
38   *
39   * Note: Do not call this function directly; always use DC_FP_START().
40   */
  > 41  void dc_fpu_begin(const char *function_name, const int line)
42  {
43  TRACE_DCN_FPU(true, function_name, line);
44  kernel_fpu_begin();
45  }
46  
47  /**
48   * dc_fpu_end - Disable FPU protection
49   * @function_name: A string containing the function name for debug 
purposes
50   * @line: A-line number where DC_FP_END was invoked for debug purpose
51   *
52   * This function is responsible for managing the use of 
kernel_fpu_end() with
53   * the advantage of providing an event trace for debugging.
54   *
55   * Note: Do not call this function directly; always use DC_FP_END().
56   */
  > 57  void dc_fpu_end(const char *function_name, const int line)

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Alex Deucher

Does disabling pageflipping via the xorg.conf help?

Alex

On Wed, Mar 31, 2021 at 1:40 PM Christian König
 wrote:
>
> Yeah, agree that must be some kind of bug in the upper layer of the stack.
>
> Most likely some userspace component is not handling the specialties of 
> interlacing correctly (different vblank timing every other frame).
>
> It probably only works on Intel and after restarting the display manager by 
> coincident.
>
> Sorry, but as I said this is a use case which basically nobody is using any 
> more and because of this the different parts of the stack are not tested well 
> enough for this.
>
> Christian.
>
> Am 31.03.21 um 17:47 schrieb Alberto Salvia Novella:
>
> Restarting the display manager service works and, more importantly, makes the 
> bug no longer reproducible.
>
> Restarting the window manager doesn't work.
>
> Changing display manager makes the bug still reproducible.
>
> Maybe this is due to xorg-server, isn't it?
>
> On Wed, 31 Mar 2021 at 16:55, Christian König 
>  wrote:
>>
>> Well the hardware is working fine as far as I can see.
>>
>> Can you try to kill the X server over SSH and see if you then get some 
>> screen update?
>>
>> Regards,
>> Christian.
>>
>> Am 31.03.21 um 16:52 schrieb Alberto Salvia Novella:
>>
>> Output.
>>
>> On Wed, 31 Mar 2021 at 16:36, Christian König  
>> wrote:
>>>
>>> Mhm strange.
>>>
>>> Can you get me the output of "sudo cat 
>>> /sys/kernel/debug/dri/0/radeon_fence_info" when the problem happens?
>>>
>>> Thanks,
>>> Christian.
>>>
>>> Am 31.03.21 um 16:33 schrieb Alberto Salvia Novella:
>>>
>>> - The computer still replies to ping.
>>> - The journal shows no errors, but a few warnings.
>>> - The mouse doesn't freeze.
>>>
>>> On Wed, 31 Mar 2021 at 10:09, Christian König  
>>> wrote:

 Can you access the system over the network and see if there is anything in 
 the system log?

 It sounds like the display stack has crashed, but when the sound keeps 
 playing the system is most likely still responsive over network.

 Thanks,
 Christian.

 Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:

 What happens is this simple: after a few minutes, about 6 or so, the 
 entire content of the screen stays still. In some minor situations only 
 the applications panel of KDE Plasma.

 If music is playing it continues playing, so only graphics are hung. Yet 
 in most cases the power button won't shut down the computer, as it usually 
 does.

 At least this is the case using kwin on x11, and not on wayland. It only 
 happens on "radeon" and not on Intel or "radeonhd".

 On Wed, 31 Mar 2021 at 09:48, Christian König  
 wrote:
>
> Correct, but a TV is intended for videos only. That's why it implements 
> only the lower HDMI standard.
>
> Interlaced transmits only halve the lines with each frame, so a 60Hz mode 
> effectively either becomes a 30Hz mode, halving the vertical resolution 
> or adaptive motion compensated which the know visual artifacts. Depending 
> on what the deinterlacing setting on your TV is.
>
> You could just add a progressive 1920x540@60 or 1920x1080@30 mode 
> manually and would have the same effect with probably better quality. See 
> https://de.wikipedia.org/wiki/Deinterlacing for reference.
>
> If you can give us some more information what is happening when the 
> system freeze we could try to narrow this down, but we can't spend much 
> time on a very specific use case in a driver which is in maintenance mode.
>
> Regards,
> Christian.
>
> Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:
>
> 24fps is intended for video only. Anything interactive at 24fps, as just 
> moving the mouse around, is extremely choppy.
>
> No way anyone would prefer that over an interlaced resolution or a lower 
> resolution. That is, by far, the worst option.
>
> Just try it on your screen, set it to 24Hz or alike, and tell me your 
> experience. You can't even tell where the mouse is going to go.
>
> On Wed, 31 Mar 2021 at 08:44, Christian König  
> wrote:
>>
>> Hi Alberto,
>>
>> well a frame rate of 24Hz is perfectly reasonable for a TV and desktop 
>> usage.
>>
>> This is probably caused by the TVs limited HDMI bandwidth and a refresh 
>> rate of 30/25 Hz for the interlaced mode isn't much better either.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:
>>
>> The frame-rate at 24Hz is extremely poor for normal desktop usage.
>>
>> If the highest resolution, aka 1080p, uses that refresh rate then the 
>> desktop will default to that frame-rate.
>>
>> Other progressive modes don't exhibit any issue.
>>
>> On Tue, 30 Mar 2021 at 18:26, Christian König  
>> wrote:
>>>
>>> Hi Alberto,

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König


Yeah, agree that must be some kind of bug in the upper layer of the stack.

Most likely some userspace component is not handling the specialties of 
interlacing correctly (different vblank timing every other frame).


It probably only works on Intel and after restarting the display manager 
by coincident.


Sorry, but as I said this is a use case which basically nobody is using 
any more and because of this the different parts of the stack are not 
tested well enough for this.


Christian.

Am 31.03.21 um 17:47 schrieb Alberto Salvia Novella:
Restarting the *display manager* service works and, more importantly, 
makes the bug no longer reproducible.


Restarting the *window manager* doesn't work.

*Changing* display manager makes the bug still reproducible.

Maybe this is due to xorg-server, isn't it?

On Wed, 31 Mar 2021 at 16:55, Christian König 
> wrote:


Well the hardware is working fine as far as I can see.

Can you try to kill the X server over SSH and see if you then get
some screen update?

Regards,
Christian.

Am 31.03.21 um 16:52 schrieb Alberto Salvia Novella:

Output

.

On Wed, 31 Mar 2021 at 16:36, Christian König
mailto:christian.koe...@amd.com>> wrote:

Mhm strange.

Can you get me the output of "sudo cat
/sys/kernel/debug/dri/0/radeon_fence_info" when the problem
happens?

Thanks,
Christian.

Am 31.03.21 um 16:33 schrieb Alberto Salvia Novella:

- The computer still replies to *ping*.
- The *journal* shows no errors, but a few warnings

.
- The *mouse* doesn't freeze.

On Wed, 31 Mar 2021 at 10:09, Christian König
mailto:christian.koe...@amd.com>>
wrote:

Can you access the system over the network and see if
there is anything in the system log?

It sounds like the display stack has crashed, but when
the sound keeps playing the system is most likely still
responsive over network.

Thanks,
Christian.

Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:

What happens is this simple: after a few minutes, about
6 or so, the entire content of the screen stays still.
In some minor situations only the applications panel of
KDE Plasma.

If music is playing it continues playing, so only
graphics are hung. Yet in most cases the power button
won't shut down the computer, as it usually does.

At least this is the case using kwin on x11, and not on
wayland. It only happens on "radeon" and not on Intel
or "radeonhd".

On Wed, 31 Mar 2021 at 09:48, Christian König
mailto:christian.koe...@amd.com>> wrote:

Correct, but a TV is intended for videos only.
That's why it implements only the lower HDMI standard.

Interlaced transmits only halve the lines with each
frame, so a 60Hz mode effectively either becomes a
30Hz mode, halving the vertical resolution or
adaptive motion compensated which the know visual
artifacts. Depending on what the deinterlacing
setting on your TV is.

You could just add a progressive 1920x540@60 or
1920x1080@30 mode manually and would have the same
effect with probably better quality. See
https://de.wikipedia.org/wiki/Deinterlacing


for reference.

If you can give us

Re: [PATCH 1/4] drm/amd/display: Introduce FPU directory inside DC

2021-03-31 Thread kernel test robot

Hi Rodrigo,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20210331]
[also build test WARNING on v5.12-rc5]
[cannot apply to linus/master v5.12-rc5 v5.12-rc4 v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
base:7a43c78d0573e00456b033e2b9a895b89464
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/c4d5d1d0a04f13014a22e6932ddf8487bb130d34
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Rodrigo-Siqueira/drm-amd-display-Base-changes-for-isolating-FPU-operation-in-a-single-place/20210331-202750
git checkout c4d5d1d0a04f13014a22e6932ddf8487bb130d34
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/../display/dc/fpu_operations/dcn2x.c:84:6: 
>> warning: no previous prototype for 
>> 'dcn20_populate_dml_writeback_from_context' [-Wmissing-prototypes]
  84 | void dcn20_populate_dml_writeback_from_context(struct dc *dc,
 |  ^


vim +/dcn20_populate_dml_writeback_from_context +84 
drivers/gpu/drm/amd/amdgpu/../display/dc/fpu_operations/dcn2x.c

83  
  > 84  void dcn20_populate_dml_writeback_from_context(struct dc *dc,
85  struct resource_context *res_ctx, display_e2e_pipe_params_st 
*pipes)
86  {
87  _dcn20_populate_dml_writeback_from_context(dc, res_ctx, pipes);
88  }
89  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König


Well the hardware is working fine as far as I can see.

Can you try to kill the X server over SSH and see if you then get some 
screen update?


Regards,
Christian.

Am 31.03.21 um 16:52 schrieb Alberto Salvia Novella:
Output 
.


On Wed, 31 Mar 2021 at 16:36, Christian König 
mailto:christian.koe...@amd.com>> wrote:


Mhm strange.

Can you get me the output of "sudo cat
/sys/kernel/debug/dri/0/radeon_fence_info" when the problem happens?

Thanks,
Christian.

Am 31.03.21 um 16:33 schrieb Alberto Salvia Novella:

- The computer still replies to *ping*.
- The *journal* shows no errors, but a few warnings

.
- The *mouse* doesn't freeze.

On Wed, 31 Mar 2021 at 10:09, Christian König
mailto:christian.koe...@amd.com>> wrote:

Can you access the system over the network and see if there
is anything in the system log?

It sounds like the display stack has crashed, but when the
sound keeps playing the system is most likely still
responsive over network.

Thanks,
Christian.

Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:

What happens is this simple: after a few minutes, about 6 or
so, the entire content of the screen stays still. In some
minor situations only the applications panel of KDE Plasma.

If music is playing it continues playing, so only graphics
are hung. Yet in most cases the power button won't shut down
the computer, as it usually does.

At least this is the case using kwin on x11, and not on
wayland. It only happens on "radeon" and not on Intel or
"radeonhd".

On Wed, 31 Mar 2021 at 09:48, Christian König
mailto:christian.koe...@amd.com>>
wrote:

Correct, but a TV is intended for videos only. That's
why it implements only the lower HDMI standard.

Interlaced transmits only halve the lines with each
frame, so a 60Hz mode effectively either becomes a 30Hz
mode, halving the vertical resolution or adaptive motion
compensated which the know visual artifacts. Depending
on what the deinterlacing setting on your TV is.

You could just add a progressive 1920x540@60 or
1920x1080@30 mode manually and would have the same
effect with probably better quality. See
https://de.wikipedia.org/wiki/Deinterlacing


for reference.

If you can give us some more information what is
happening when the system freeze we could try to narrow
this down, but we can't spend much time on a very
specific use case in a driver which is in maintenance mode.

Regards,
Christian.

Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:

24fps is intended for video only. Anything interactive
at 24fps, as just moving the mouse around, is extremely
choppy.

No way anyone would prefer that over an interlaced
resolution or a lower resolution. That is, by far, the
worst option.

Just try it on your screen, set it to 24Hz or alike,
and tell me your experience. You can't even tell where
the mouse is going to go.

On Wed, 31 Mar 2021 at 08:44, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

well a frame rate of 24Hz is perfectly reasonable
for a TV and desktop usage.

This is probably caused by the TVs limited HDMI
bandwidth and a refresh rate of 30/25 Hz for the
interlaced mode isn't much better either.

Regards,
Christian.

Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

The frame-rate at 24Hz is extremely poor for
normal desktop usage.

If

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Alberto Salvia Novella

Output

.

On Wed, 31 Mar 2021 at 16:36, Christian König 
wrote:

> Mhm strange.
>
> Can you get me the output of "sudo cat
> /sys/kernel/debug/dri/0/radeon_fence_info" when the problem happens?
>
> Thanks,
> Christian.
>
> Am 31.03.21 um 16:33 schrieb Alberto Salvia Novella:
>
> - The computer still replies to *ping*.
> - The *journal* shows no errors, but a few warnings
> 
> .
> - The *mouse* doesn't freeze.
>
> On Wed, 31 Mar 2021 at 10:09, Christian König 
> wrote:
>
>> Can you access the system over the network and see if there is anything
>> in the system log?
>>
>> It sounds like the display stack has crashed, but when the sound keeps
>> playing the system is most likely still responsive over network.
>>
>> Thanks,
>> Christian.
>>
>> Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:
>>
>> What happens is this simple: after a few minutes, about 6 or so, the
>> entire content of the screen stays still. In some minor situations only the
>> applications panel of KDE Plasma.
>>
>> If music is playing it continues playing, so only graphics are hung. Yet
>> in most cases the power button won't shut down the computer, as it usually
>> does.
>>
>> At least this is the case using kwin on x11, and not on wayland. It only
>> happens on "radeon" and not on Intel or "radeonhd".
>>
>> On Wed, 31 Mar 2021 at 09:48, Christian König 
>> wrote:
>>
>>> Correct, but a TV is intended for videos only. That's why it implements
>>> only the lower HDMI standard.
>>>
>>> Interlaced transmits only halve the lines with each frame, so a 60Hz
>>> mode effectively either becomes a 30Hz mode, halving the vertical
>>> resolution or adaptive motion compensated which the know visual artifacts.
>>> Depending on what the deinterlacing setting on your TV is.
>>>
>>> You could just add a progressive 1920x540@60 or 1920x1080@30 mode
>>> manually and would have the same effect with probably better quality. See
>>> https://de.wikipedia.org/wiki/Deinterlacing
>>> 
>>> for reference.
>>>
>>> If you can give us some more information what is happening when the
>>> system freeze we could try to narrow this down, but we can't spend much
>>> time on a very specific use case in a driver which is in maintenance mode.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:
>>>
>>> 24fps is intended for video only. Anything interactive at 24fps, as just
>>> moving the mouse around, is extremely choppy.
>>>
>>> No way anyone would prefer that over an interlaced resolution or a lower
>>> resolution. That is, by far, the worst option.
>>>
>>> Just try it on your screen, set it to 24Hz or alike, and tell me your
>>> experience. You can't even tell where the mouse is going to go.
>>>
>>> On Wed, 31 Mar 2021 at 08:44, Christian König 
>>> wrote:
>>>
 Hi Alberto,

 well a frame rate of 24Hz is perfectly reasonable for a TV and desktop
 usage.

 This is probably caused by the TVs limited HDMI bandwidth and a refresh
 rate of 30/25 Hz for the interlaced mode isn't much better either.

 Regards,
 Christian.

 Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

 The frame-rate at 24Hz is extremely poor for normal desktop usage.

 If the highest resolution, aka 1080p, uses that refresh rate then the
 desktop will default to that frame-rate.

 Other progressive modes don't exhibit any issue.

 On Tue, 30 Mar 2021 at 18:26, Christian König 
 wrote:

> Hi Alberto,
>
> I think the driver should only support resolutions that are
> *progressive*, but also at least of *50Hz*.
>
>
> Why do you think so?, the 24Hz resolution seems to be the native one
> of the display.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:
>
> This is why I'm using interlaced:
>
> $ *xrandr*
> Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
> DisplayPort-0 disconnected (normal left inverted right x axis y axis)
>

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Alberto Salvia Novella

- The computer still replies to *ping*.
- The *journal* shows no errors, but a few warnings

.
- The *mouse* doesn't freeze.

On Wed, 31 Mar 2021 at 10:09, Christian König 
wrote:

> Can you access the system over the network and see if there is anything in
> the system log?
>
> It sounds like the display stack has crashed, but when the sound keeps
> playing the system is most likely still responsive over network.
>
> Thanks,
> Christian.
>
> Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:
>
> What happens is this simple: after a few minutes, about 6 or so, the
> entire content of the screen stays still. In some minor situations only the
> applications panel of KDE Plasma.
>
> If music is playing it continues playing, so only graphics are hung. Yet
> in most cases the power button won't shut down the computer, as it usually
> does.
>
> At least this is the case using kwin on x11, and not on wayland. It only
> happens on "radeon" and not on Intel or "radeonhd".
>
> On Wed, 31 Mar 2021 at 09:48, Christian König 
> wrote:
>
>> Correct, but a TV is intended for videos only. That's why it implements
>> only the lower HDMI standard.
>>
>> Interlaced transmits only halve the lines with each frame, so a 60Hz mode
>> effectively either becomes a 30Hz mode, halving the vertical resolution or
>> adaptive motion compensated which the know visual artifacts. Depending on
>> what the deinterlacing setting on your TV is.
>>
>> You could just add a progressive 1920x540@60 or 1920x1080@30 mode
>> manually and would have the same effect with probably better quality. See
>> https://de.wikipedia.org/wiki/Deinterlacing
>> 
>> for reference.
>>
>> If you can give us some more information what is happening when the
>> system freeze we could try to narrow this down, but we can't spend much
>> time on a very specific use case in a driver which is in maintenance mode.
>>
>> Regards,
>> Christian.
>>
>> Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:
>>
>> 24fps is intended for video only. Anything interactive at 24fps, as just
>> moving the mouse around, is extremely choppy.
>>
>> No way anyone would prefer that over an interlaced resolution or a lower
>> resolution. That is, by far, the worst option.
>>
>> Just try it on your screen, set it to 24Hz or alike, and tell me your
>> experience. You can't even tell where the mouse is going to go.
>>
>> On Wed, 31 Mar 2021 at 08:44, Christian König 
>> wrote:
>>
>>> Hi Alberto,
>>>
>>> well a frame rate of 24Hz is perfectly reasonable for a TV and desktop
>>> usage.
>>>
>>> This is probably caused by the TVs limited HDMI bandwidth and a refresh
>>> rate of 30/25 Hz for the interlaced mode isn't much better either.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:
>>>
>>> The frame-rate at 24Hz is extremely poor for normal desktop usage.
>>>
>>> If the highest resolution, aka 1080p, uses that refresh rate then the
>>> desktop will default to that frame-rate.
>>>
>>> Other progressive modes don't exhibit any issue.
>>>
>>> On Tue, 30 Mar 2021 at 18:26, Christian König 
>>> wrote:
>>>
 Hi Alberto,

 I think the driver should only support resolutions that are
 *progressive*, but also at least of *50Hz*.


 Why do you think so?, the 24Hz resolution seems to be the native one of
 the display.

 Regards,
 Christian.

 Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

 This is why I'm using interlaced:

 $ *xrandr*
 Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
 DisplayPort-0 disconnected (normal left inverted right x axis y axis)
 HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x
 axis y axis) 16mm x 9mm
1920x*1080i*60.00*+  50.0059.94
1920x1080 *24.00*23.98
1280x*720*  60.0050.0059.94
1024x768  75.0370.0760.00
832x624   74.55
800x600   72.1975.0060.3256.25
720x576   50.00
720x576i  50.00
720x480   60.0059.94
720x480i  60.0059.94
640x480   75.0072.8166.6760.0059.94
720x400   70.08
 DVI-0 disconnected (normal left inverted right x axis y axis)

 I think the driver should only support resolutions that are
 *progressive*, but also at least of *50Hz*.

 On Tue, 30 Mar 2021 at 15:41, Christian König <

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König


Mhm strange.

Can you get me the output of "sudo cat 
/sys/kernel/debug/dri/0/radeon_fence_info" when the problem happens?


Thanks,
Christian.

Am 31.03.21 um 16:33 schrieb Alberto Salvia Novella:

- The computer still replies to *ping*.
- The *journal* shows no errors, but a few warnings 
.

- The *mouse* doesn't freeze.

On Wed, 31 Mar 2021 at 10:09, Christian König 
mailto:christian.koe...@amd.com>> wrote:


Can you access the system over the network and see if there is
anything in the system log?

It sounds like the display stack has crashed, but when the sound
keeps playing the system is most likely still responsive over network.

Thanks,
Christian.

Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:

What happens is this simple: after a few minutes, about 6 or so,
the entire content of the screen stays still. In some minor
situations only the applications panel of KDE Plasma.

If music is playing it continues playing, so only graphics are
hung. Yet in most cases the power button won't shut down the
computer, as it usually does.

At least this is the case using kwin on x11, and not on wayland.
It only happens on "radeon" and not on Intel or "radeonhd".

On Wed, 31 Mar 2021 at 09:48, Christian König
mailto:christian.koe...@amd.com>> wrote:

Correct, but a TV is intended for videos only. That's why it
implements only the lower HDMI standard.

Interlaced transmits only halve the lines with each frame, so
a 60Hz mode effectively either becomes a 30Hz mode, halving
the vertical resolution or adaptive motion compensated which
the know visual artifacts. Depending on what the
deinterlacing setting on your TV is.

You could just add a progressive 1920x540@60 or 1920x1080@30
mode manually and would have the same effect with probably
better quality. See
https://de.wikipedia.org/wiki/Deinterlacing


for reference.

If you can give us some more information what is happening
when the system freeze we could try to narrow this down, but
we can't spend much time on a very specific use case in a
driver which is in maintenance mode.

Regards,
Christian.

Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:

24fps is intended for video only. Anything interactive at
24fps, as just moving the mouse around, is extremely choppy.

No way anyone would prefer that over an interlaced
resolution or a lower resolution. That is, by far, the worst
option.

Just try it on your screen, set it to 24Hz or alike, and
tell me your experience. You can't even tell where the mouse
is going to go.

On Wed, 31 Mar 2021 at 08:44, Christian König
mailto:christian.koe...@amd.com>>
wrote:

Hi Alberto,

well a frame rate of 24Hz is perfectly reasonable for a
TV and desktop usage.

This is probably caused by the TVs limited HDMI
bandwidth and a refresh rate of 30/25 Hz for the
interlaced mode isn't much better either.

Regards,
Christian.

Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

The frame-rate at 24Hz is extremely poor for normal
desktop usage.

If the highest resolution, aka 1080p, uses that refresh
rate then the desktop will default to that frame-rate.

Other progressive modes don't exhibit any issue.

On Tue, 30 Mar 2021 at 18:26, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,


I think the driver should only support resolutions
that are *progressive*, but also at least of *50Hz*.


Why do you think so?, the 24Hz resolution seems to
be the native one of the display.

Regards,
Christian.

Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

Re: [PATCH] drm/amdgpu: drop some unused atombios functions

2021-03-31 Thread Christian König


Am 31.03.21 um 16:06 schrieb Alex Deucher:

These were leftover from the old CI dpm code which was
retired a while ago.

Signed-off-by: Alex Deucher 


Acked-by: Christian König 

BTW: Wasn't there a way to get the linker to complain about unused symbols?

E.g. unused function, global variables etc which are only declared but 
never referenced.


Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 151 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h |  12 --
  2 files changed, 163 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index ca7bdbdbf302..494b2e1717d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1232,157 +1232,6 @@ int 
amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *
return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, 
leakage_idx, voltage);
  }
  
-int amdgpu_atombios_get_leakage_id_from_vbios(struct amdgpu_device *adev,

- u16 *leakage_id)
-{
-   union set_voltage args;
-   int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
-   u8 frev, crev;
-
-   if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, 
, ))
-   return -EINVAL;
-
-   switch (crev) {
-   case 3:
-   case 4:
-   args.v3.ucVoltageType = 0;
-   args.v3.ucVoltageMode = ATOM_GET_LEAKAGE_ID;
-   args.v3.usVoltageLevel = 0;
-
-   amdgpu_atom_execute_table(adev->mode_info.atom_context, index, 
(uint32_t *));
-
-   *leakage_id = le16_to_cpu(args.v3.usVoltageLevel);
-   break;
-   default:
-   DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct 
amdgpu_device *adev,
-u16 *vddc, u16 
*vddci,
-u16 
virtual_voltage_id,
-u16 
vbios_voltage_id)
-{
-   int index = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
-   u8 frev, crev;
-   u16 data_offset, size;
-   int i, j;
-   ATOM_ASIC_PROFILING_INFO_V2_1 *profile;
-   u16 *leakage_bin, *vddc_id_buf, *vddc_buf, *vddci_id_buf, *vddci_buf;
-
-   *vddc = 0;
-   *vddci = 0;
-
-   if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, 
,
-   , , _offset))
-   return -EINVAL;
-
-   profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
-   (adev->mode_info.atom_context->bios + data_offset);
-
-   switch (frev) {
-   case 1:
-   return -EINVAL;
-   case 2:
-   switch (crev) {
-   case 1:
-   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
-   return -EINVAL;
-   leakage_bin = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usLeakageBinArrayOffset));
-   vddc_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
-   vddc_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
-   vddci_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
-   vddci_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
-
-   if (profile->ucElbVDDC_Num > 0) {
-   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
-   if (vddc_id_buf[i] == 
virtual_voltage_id) {
-   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {
-   if (vbios_voltage_id <= 
leakage_bin[j]) {
-   *vddc = vddc_buf[j 
* profile->ucElbVDDC_Num + i];
-   break;
-   }
-   }
-   break;
-

[PATCH] drm/amdgpu: drop some unused atombios functions

2021-03-31 Thread Alex Deucher

These were leftover from the old CI dpm code which was
retired a while ago.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 151 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h |  12 --
 2 files changed, 163 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index ca7bdbdbf302..494b2e1717d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1232,157 +1232,6 @@ int 
amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *
return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, 
leakage_idx, voltage);
 }
 
-int amdgpu_atombios_get_leakage_id_from_vbios(struct amdgpu_device *adev,
- u16 *leakage_id)
-{
-   union set_voltage args;
-   int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
-   u8 frev, crev;
-
-   if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, 
, ))
-   return -EINVAL;
-
-   switch (crev) {
-   case 3:
-   case 4:
-   args.v3.ucVoltageType = 0;
-   args.v3.ucVoltageMode = ATOM_GET_LEAKAGE_ID;
-   args.v3.usVoltageLevel = 0;
-
-   amdgpu_atom_execute_table(adev->mode_info.atom_context, index, 
(uint32_t *));
-
-   *leakage_id = le16_to_cpu(args.v3.usVoltageLevel);
-   break;
-   default:
-   DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
-int amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct 
amdgpu_device *adev,
-u16 *vddc, u16 
*vddci,
-u16 
virtual_voltage_id,
-u16 
vbios_voltage_id)
-{
-   int index = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
-   u8 frev, crev;
-   u16 data_offset, size;
-   int i, j;
-   ATOM_ASIC_PROFILING_INFO_V2_1 *profile;
-   u16 *leakage_bin, *vddc_id_buf, *vddc_buf, *vddci_id_buf, *vddci_buf;
-
-   *vddc = 0;
-   *vddci = 0;
-
-   if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, 
,
-   , , _offset))
-   return -EINVAL;
-
-   profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
-   (adev->mode_info.atom_context->bios + data_offset);
-
-   switch (frev) {
-   case 1:
-   return -EINVAL;
-   case 2:
-   switch (crev) {
-   case 1:
-   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
-   return -EINVAL;
-   leakage_bin = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usLeakageBinArrayOffset));
-   vddc_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
-   vddc_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
-   vddci_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
-   vddci_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
-
-   if (profile->ucElbVDDC_Num > 0) {
-   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
-   if (vddc_id_buf[i] == 
virtual_voltage_id) {
-   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {
-   if (vbios_voltage_id <= 
leakage_bin[j]) {
-   *vddc = 
vddc_buf[j * profile->ucElbVDDC_Num + i];
-   break;
-   }
-   }
-   break;
-   }
-   }
-   }
-   if (profile->ucElbVDDCI_Num > 0) {
-   for (i = 0; i < profile->ucElbVDDCI_Num; i++) {
-   if

[PATCH][next] drm/amd/display: remove redundant initialization of variable status

2021-03-31 Thread Colin King

From: Colin Ian King 

The variable status is being initialized with a value that is
never read and it is being updated later with a new value.
The initialization is redundant and can be removed. Also clean
up an indentation.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index b092627bd661..4c226db777dc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1729,12 +1729,11 @@ bool perform_link_training_with_retries(
dc_link_dp_perform_link_training_skip_aux(link, 
link_setting);
return true;
} else {
-   enum link_training_result status = 
LINK_TRAINING_CR_FAIL_LANE0;
+   enum link_training_result status;
 
-   status = dc_link_dp_perform_link_training(
-   
link,
-   
link_setting,
-   
skip_video_pattern);
+   status = dc_link_dp_perform_link_training(link,
+ link_setting,
+ 
skip_video_pattern);
if (status == LINK_TRAINING_SUCCESS)
return true;
}
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd: cleanup coding style a bit

2021-03-31 Thread Bernard Zhao

Fix patch check warning:
WARNING: suspect code indent for conditional statements (8, 17)
+   if (obj && obj->use < 0) {
+DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);

WARNING: braces {} are not necessary for single statement blocks
+   if (obj && obj->use < 0) {
+DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);
+   }

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1fb2a91ad30a..43d17b72c265 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -449,11 +449,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 
 static inline void put_obj(struct ras_manager *obj)
 {
-   if (obj && --obj->use == 0)
+   if (obj && (--obj->use == 0))
list_del(>node);
-   if (obj && obj->use < 0) {
-DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", 
obj->head.name);
-   }
+   if (obj && (obj->use < 0))
+   DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name);
 }
 
 /* make one obj and return it. */
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] amd/amdgpu: code refactoring to clean code style a bit

2021-03-31 Thread Bernard Zhao

Fix checkpatch.pl warning:
Too many leading tabs - consider code refactoring
WARNING: Too many leading tabs - consider code refactoring
+   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {

WARNING: Too many leading tabs - consider code refactoring
+   if (vbios_voltage_id <= 
leakage_bin[j]) {

WARNING: Too many leading tabs - consider code refactoring
+   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {

WARNING: Too many leading tabs - consider code refactoring
+   if (vbios_voltage_id <= 
leakage_bin[j]) {

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 84 
 1 file changed, 35 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 86add0f4ea4d..9968ff8ddc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1283,65 +1283,51 @@ int 
amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(struct amdgpu_devic
profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
(adev->mode_info.atom_context->bios + data_offset);
 
-   switch (frev) {
-   case 1:
+   if ((frev != 2) || (crev != 1)) {
+   DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
return -EINVAL;
-   case 2:
-   switch (crev) {
-   case 1:
-   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
-   return -EINVAL;
-   leakage_bin = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usLeakageBinArrayOffset));
-   vddc_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
-   vddc_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
-   vddci_id_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
-   vddci_buf = (u16 *)
-   (adev->mode_info.atom_context->bios + 
data_offset +
-
le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
-
-   if (profile->ucElbVDDC_Num > 0) {
-   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
-   if (vddc_id_buf[i] == 
virtual_voltage_id) {
-   for (j = 0; j < 
profile->ucLeakageBinNum; j++) {
-   if (vbios_voltage_id <= 
leakage_bin[j]) {
-   *vddc = 
vddc_buf[j * profile->ucElbVDDC_Num + i];
-   break;
-   }
-   }
+   }
+
+   if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
+   return -EINVAL;
+
+   leakage_bin = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usLeakageBinArrayOffset));
+   vddc_id_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
+   vddc_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
+   vddci_id_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset 
+
+le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
+   vddci_buf = (u16 *)(adev->mode_info.atom_context->bios + data_offset +
+le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
+
+   if (profile->ucElbVDDC_Num > 0) {
+   for (i = 0; i < profile->ucElbVDDC_Num; i++) {
+   if (vddc_id_buf[i] == virtual_voltage_id) {
+   for (j = 0; j < profile->ucLeakageBinNum; j++) {
+   if (vbios_voltage_id <= leakage_bin[j]) 
{
+   *vddc = vddc_buf[j * 
profile->ucElbVDDC_Num + i];
break;
}
}
+   break;
}
-

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Alberto Salvia Novella

What happens is this simple: after a few minutes, about 6 or so, the entire
content of the screen stays still. In some minor situations only the
applications panel of KDE Plasma.

If music is playing it continues playing, so only graphics are hung. Yet in
most cases the power button won't shut down the computer, as it usually
does.

At least this is the case using kwin on x11, and not on wayland. It only
happens on "radeon" and not on Intel or "radeonhd".

On Wed, 31 Mar 2021 at 09:48, Christian König 
wrote:

> Correct, but a TV is intended for videos only. That's why it implements
> only the lower HDMI standard.
>
> Interlaced transmits only halve the lines with each frame, so a 60Hz mode
> effectively either becomes a 30Hz mode, halving the vertical resolution or
> adaptive motion compensated which the know visual artifacts. Depending on
> what the deinterlacing setting on your TV is.
>
> You could just add a progressive 1920x540@60 or 1920x1080@30 mode
> manually and would have the same effect with probably better quality. See
> https://de.wikipedia.org/wiki/Deinterlacing for reference.
>
> If you can give us some more information what is happening when the system
> freeze we could try to narrow this down, but we can't spend much time on a
> very specific use case in a driver which is in maintenance mode.
>
> Regards,
> Christian.
>
> Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:
>
> 24fps is intended for video only. Anything interactive at 24fps, as just
> moving the mouse around, is extremely choppy.
>
> No way anyone would prefer that over an interlaced resolution or a lower
> resolution. That is, by far, the worst option.
>
> Just try it on your screen, set it to 24Hz or alike, and tell me your
> experience. You can't even tell where the mouse is going to go.
>
> On Wed, 31 Mar 2021 at 08:44, Christian König 
> wrote:
>
>> Hi Alberto,
>>
>> well a frame rate of 24Hz is perfectly reasonable for a TV and desktop
>> usage.
>>
>> This is probably caused by the TVs limited HDMI bandwidth and a refresh
>> rate of 30/25 Hz for the interlaced mode isn't much better either.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:
>>
>> The frame-rate at 24Hz is extremely poor for normal desktop usage.
>>
>> If the highest resolution, aka 1080p, uses that refresh rate then the
>> desktop will default to that frame-rate.
>>
>> Other progressive modes don't exhibit any issue.
>>
>> On Tue, 30 Mar 2021 at 18:26, Christian König 
>> wrote:
>>
>>> Hi Alberto,
>>>
>>> I think the driver should only support resolutions that are
>>> *progressive*, but also at least of *50Hz*.
>>>
>>>
>>> Why do you think so?, the 24Hz resolution seems to be the native one of
>>> the display.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:
>>>
>>> This is why I'm using interlaced:
>>>
>>> $ *xrandr*
>>> Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
>>> DisplayPort-0 disconnected (normal left inverted right x axis y axis)
>>> HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x
>>> axis y axis) 16mm x 9mm
>>>1920x*1080i*60.00*+  50.0059.94
>>>1920x1080 *24.00*23.98
>>>1280x*720*  60.0050.0059.94
>>>1024x768  75.0370.0760.00
>>>832x624   74.55
>>>800x600   72.1975.0060.3256.25
>>>720x576   50.00
>>>720x576i  50.00
>>>720x480   60.0059.94
>>>720x480i  60.0059.94
>>>640x480   75.0072.8166.6760.0059.94
>>>720x400   70.08
>>> DVI-0 disconnected (normal left inverted right x axis y axis)
>>>
>>> I think the driver should only support resolutions that are
>>> *progressive*, but also at least of *50Hz*.
>>>
>>> On Tue, 30 Mar 2021 at 15:41, Christian König <
>>> ckoenig.leichtzumer...@gmail.com> wrote:
>>>
 Mhm, no idea why an interlaced resolution would cause a crash. Maybe
 some miscalculation in the display code.

 But apart from that if you just connected your PC to a TV I also
 wouldn't recommend using an interlaced resolution in the first place.

 See those resolutions only exists for backward compatibility with
 analog hardware.

 I think we would just disable those modes instead of searching for the
 bug.

 Regards,
 Christian.

 Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:

 I guessed so.

 The GPU is a Radeon HD5870, and the screen is an old Telefunken TV
 (TLFK22LEDPVR1).

 Since my real display got into repair I used this TV meanwhile, and to
 my surprise it froze the system.

 On Tue, 30 Mar 2021 at 10:15, Christian König 
 wrote:

> Hi Alberto,
>
> well what hardware do you have?
>
> Interlaced resolutions are not used any more on modern hardware, so
> they
> are not well tested.
>
> Regards,
>

[PATCH] drm/amd: use kmalloc_array over kmalloc with multiply

2021-03-31 Thread Bernard Zhao

Fix patch check warning:
WARNING: Prefer kmalloc_array over kmalloc with multiply
+   buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);

Signed-off-by: Bernard Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 17d1736367ea..246522423559 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -81,7 +81,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char 
__user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
 
-   buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);
+   buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
 
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 0/4] drm/amd/display: Base changes for isolating FPU operation in a single place

2021-03-31 Thread Christian König


Hi Rodrigo,

I'm not so happy about the whole recursion thing, but I think that is 
something which can be worked on later on.


Apart from that the approach sounds solid to me.

Regards,
Christian.

Am 31.03.21 um 14:24 schrieb Rodrigo Siqueira:

Hi,

In the display core, we utilize floats and doubles units for calculating
modesetting parameters. One side effect of our approach to use double-precision
is the fact that we spread multiple FPU access across our driver, which means
that we can accidentally clobber user space FPU state.

# Challenges

1. Keep in mind that this FPU code is ingrained in our display driver and
performs several crucial tasks. Additionally, we already have multiple
architectures available in the kernel and a large set of users; in other words,
we prefer to avoid a radical approach that might break our user's system.

2. We share our display code with Windows; thus, we need to maintain the
interoperability between these two systems.

3. We need a mechanism for identifying which function uses FPU registers;
fortunately, Peter Zijlstra wrote a series a couple of months ago where he
introduced an FPU check for objtool. I used the following command for
identifying the potential FPU usage:

  ./tools/objtool/objtool check -Ffa "drivers/gpu/drm/amd/display/dc/ANY_FILE.o"

4. Since our code heavily relies on FPU and the fact that we spread
kernel_fpu_begin/end across multiple functions, we can have some complex
scenarios that will require code refactoring. However, we want to avoid
complicated changes since this is a formula to introduce regressions; we want
something that allows us to fix it in small, safe, and reliable steps.

# Our approach

For trying to solve this problem, we came up with the following strategy:

1. Keep in mind that we are using kernel_fpu_begin/end spread in various areas
and sometimes across multiple functions. If we try to move some of the
functions to an isolated place, we can generate a situation where we can call
the FPU protection more than once, causing multiple warnings. We can deal with
this problem by adding a thin management layer around the kernel_fpu_begin/end
used inside the display.

2. We will need a trace mechanism for this FPU management inside our display
code.

3. After we get the thin layer that manages FPU, we can start to move each
function that uses FPU to the centralized place. Our DQE runs multiple tests in
different ASICs every week; we can take advantage of this to ensure that our
FPU patches work does not introduce any regression. The idea is to work on a
specific part of the code every week (e.g.,  week 1: DCN2, week 1: DCN2.1,
etc.).

4. Finally, after we can isolate the FPU operations in a single place, we can
altogether remove the FPU flags from other files and eliminate an unnecessary
code introduced to deal with this problem.

# This series

To maintain the interoperability between multiple OSes, we already have a
define named DC_FP_START/END, which is a straightforward wrapper to
kernel_fpu_begin/end in the Linux side. In this series, I decided to expand the
scope of this DC_FP_* wrapper to trace FPU entrance and exit in the display
code, but I also add a mechanism for managing the entrance and exit of
kernel_fpu_begin/end. You can see the details on how I did that in the last two
patches.

I also isolate a simple function that requires FPU access to demonstrate my
strategy for isolating this FPU access in a single place. If this series gets
accepted, the following steps consist of moving all FPU functions weekly until
we isolate everything in the fpu_operation folder.

Best Regards
Rodrigo Siqueira


Rodrigo Siqueira (4):
   drm/amd/display: Introduce FPU directory inside DC
   drm/amd/display: Add FPU event trace
   drm/amd/display: Add ref count control for FPU utilization
   drm/amd/display: Add DC_FP helper to check FPU state

  .../gpu/drm/amd/display/amdgpu_dm/Makefile|   3 +-
  .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   |  24 
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 111 ++
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  34 ++
  drivers/gpu/drm/amd/display/dc/Makefile   |   1 +
  drivers/gpu/drm/amd/display/dc/dc_trace.h |   3 +
  .../drm/amd/display/dc/dcn20/dcn20_resource.c |  41 +--
  .../drm/amd/display/dc/dcn20/dcn20_resource.h |   2 -
  .../drm/amd/display/dc/dcn21/dcn21_resource.c |   2 +
  .../amd/display/dc/fpu_operations/Makefile|  58 +
  .../drm/amd/display/dc/fpu_operations/dcn2x.c | 106 +
  .../drm/amd/display/dc/fpu_operations/dcn2x.h |  33 ++
  drivers/gpu/drm/amd/display/dc/os_types.h |   6 +-
  13 files changed, 381 insertions(+), 43 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
  create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
  create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/Makefile
  create mode 100644

Re: [PATCH 4/4] drm/amd/display: Add DC_FP helper to check FPU state

2021-03-31 Thread Christian König


Am 31.03.21 um 14:25 schrieb Rodrigo Siqueira:

To fully isolate FPU operations in a single place, we must avoid
situations where compilers spill FP values to registers due to FP enable
in a specific C file. Note that even if we isolate all FPU functions in
a single file and call its interface from other files, the compiler
might enable the use of FPU before we call DC_FP_START. Nevertheless, it
is the programmer's responsibility to invoke DC_FP_START/END in the
correct place. To highlight situations where developers forgot to use
the FP protection before calling the DC FPU interface functions, we
introduce a helper that checks if the function is invoked under FP
protection. If not, it will trigger a kernel warning.

Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 34 ---
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  1 +
  .../drm/amd/display/dc/dcn20/dcn20_resource.c |  2 ++
  .../drm/amd/display/dc/fpu_operations/dcn2x.c | 17 ++
  4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 5dcefe193523..0d95f680b62b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -40,6 +40,25 @@
   */
  
  static DEFINE_PER_CPU(atomic_t, fpu_ref);

+static DEFINE_PER_CPU(atomic_t, fp_dc_enabled);
+
+/**
+ * is_fp_dc_enabled - Check if FPU protection is enabled
+ *
+ * This function tells if the code is already under FPU protection or not. A
+ * function that works as an API for a set of FPU operations can use this
+ * function for checking if the caller invoked it after DC_FP_START(). For
+ * example, take a look at dcn2x.c file.
+ *
+ * Return:
+ * Return true if we already enabled FPU protection, otherwise return false.
+ */
+inline bool is_fp_dc_enabled(void)


I would rather name this dc_is_fp_enabled() or even better directly make 
this dc_assert_fp_enabled().



+{
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
+
+   return atomic_read(fp_enabled);


The handling with fp_enaled is overkill. Instead you can also check 
fpu_ref for > 1.


Regards,
Christian.


+}
  
  /**

   * dc_fpu_begin - Enables FPU protection
@@ -55,12 +74,15 @@ void dc_fpu_begin(const char *function_name, const int line)
  {
int ret;
atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
  
  	ret = atomic_inc_return(local_fpu_ref);

TRACE_DCN_FPU(true, function_name, line, ret);
  
-	if (ret == 1)

+   if (ret == 1) {
kernel_fpu_begin();
+   atomic_set(fp_enabled, 1);
+   }
  }
  
  /**

@@ -75,13 +97,15 @@ void dc_fpu_begin(const char *function_name, const int line)
   */
  void dc_fpu_end(const char *function_name, const int line)
  {
-
-   int ret;
+   bool ret;
atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
  
-	ret = atomic_dec_return(local_fpu_ref);

+   ret = atomic_dec_and_test(local_fpu_ref);
TRACE_DCN_FPU(false, function_name, line, ret);
  
-	if (!ret)

+   if (ret) {
+   atomic_set(fp_enabled, 0);
kernel_fpu_end();
+   }
  }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
index fb54983c5c60..e782dfa640bf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
@@ -27,6 +27,7 @@
  #ifndef __DC_FPU_H__
  #define __DC_FPU_H__
  
+bool is_fp_dc_enabled(void);

  void dc_fpu_begin(const char *function_name, const int line);
  void dc_fpu_end(const char *function_name, const int line);
  
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c

index b58edd012038..d0771e29c243 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2351,7 +2351,9 @@ int dcn20_populate_dml_pipes_from_context(
}
  
  	/* populate writeback information */

+   DC_FP_START();
dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, 
pipes);
+   DC_FP_END();
  
  	return pipe_cnt;

  }
diff --git a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c 
b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
index 32b23a182428..1c8a97d342c0 100644
--- a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
+++ b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
@@ -42,6 +42,22 @@
   *that deals with FP register is contained within this call.
   * 3. All function that needs to be accessed outside this file requires a
   *public interface that not uses any FPU reference.
+ * 4. Developers should not use DC_FP_START/END in this file, but they need to
+

Re: [PATCH 3/4] drm/amd/display: Add ref count control for FPU utilization

2021-03-31 Thread Christian König


Am 31.03.21 um 14:25 schrieb Rodrigo Siqueira:

DC invokes DC_FPU_START/END in multiple parts of the code; this can
create a situation where we invoke this FPU operation in a nested way.
For avoiding this situation, this commit adds a mechanism where
dc_fpu_begin/end manages the access to kernel_fpu_begin/end.


In general please name this "recursion" control instead of "ref count". 
Let's call the child by it's name :)




Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
  .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   | 13 ---
  .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 34 ---
  drivers/gpu/drm/amd/display/dc/dc_trace.h |  4 +--
  3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 230bb12c405e..cd4f0d3f37fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -638,23 +638,26 @@ TRACE_EVENT(amdgpu_refresh_rate_track,
  );
  
  TRACE_EVENT(dcn_fpu,

-   TP_PROTO(bool begin, const char *function, const int line),
-   TP_ARGS(begin, function, line),
+   TP_PROTO(bool begin, const char *function, const int line, const 
int ref_count),
+   TP_ARGS(begin, function, line, ref_count),
  
  	TP_STRUCT__entry(

 __field(bool, begin)
 __field(const char *, function)
 __field(int, line)
+__field(int, ref_count)
),
TP_fast_assign(
   __entry->begin = begin;
   __entry->function = function;
   __entry->line = line;
+  __entry->ref_count = ref_count;
),
-   TP_printk("%s()+%d: %s",
+   TP_printk("%s: ref_count: %d: %s()+%d:",
+ __entry->begin ? "begin" : "end",
+ __entry->ref_count,
  __entry->function,
- __entry->line,
- __entry->begin ? "begin" : "end"
+ __entry->line
)
  );
  
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c

index ff34007509de..5dcefe193523 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -28,6 +28,19 @@
  
  #include 
  
+/**

+ * DOC: Overview
+ *
+ * DC core uses FPU operations in multiple parts of the code, which requires a
+ * more specialized way to manage these areas' entrance. To fulfill this
+ * requirement, we created some wrapper functions that encapsulate
+ * kernel_fpu_begin/end to better fit our need in the display component. In
+ * summary, in this file, you can find functions related to FPU operation
+ * management.
+ */
+
+static DEFINE_PER_CPU(atomic_t, fpu_ref);
+
  /**
   * dc_fpu_begin - Enables FPU protection
   * @function_name: A string containing the function name for debug purposes
@@ -40,8 +53,14 @@
   */
  void dc_fpu_begin(const char *function_name, const int line)
  {
-   TRACE_DCN_FPU(true, function_name, line);
-   kernel_fpu_begin();
+   int ret;
+   atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+
+   ret = atomic_inc_return(local_fpu_ref);


You need to disable preemption for this or otherwise it can be that you 
schedule to another process.


Regards,
Christian.


+   TRACE_DCN_FPU(true, function_name, line, ret);
+
+   if (ret == 1)
+   kernel_fpu_begin();
  }
  
  /**

@@ -56,6 +75,13 @@ void dc_fpu_begin(const char *function_name, const int line)
   */
  void dc_fpu_end(const char *function_name, const int line)
  {
-   TRACE_DCN_FPU(false, function_name, line);
-   kernel_fpu_end();
+
+   int ret;
+   atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+
+   ret = atomic_dec_return(local_fpu_ref);
+   TRACE_DCN_FPU(false, function_name, line, ret);
+
+   if (!ret)
+   kernel_fpu_end();
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h 
b/drivers/gpu/drm/amd/display/dc/dc_trace.h
index d598ba697e45..c711797e5c9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h
@@ -38,5 +38,5 @@
  #define TRACE_DCN_CLOCK_STATE(dcn_clocks) \
trace_amdgpu_dm_dc_clocks_state(dcn_clocks)
  
-#define TRACE_DCN_FPU(begin, function, line) \

-   trace_dcn_fpu(begin, function, line)
+#define TRACE_DCN_FPU(begin, function, line, ref_count) \
+   trace_dcn_fpu(begin, function, line, ref_count)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 0/4] drm/amd/display: Base changes for isolating FPU operation in a single place

2021-03-31 Thread Rodrigo Siqueira

Hi,

In the display core, we utilize floats and doubles units for calculating
modesetting parameters. One side effect of our approach to use double-precision
is the fact that we spread multiple FPU access across our driver, which means
that we can accidentally clobber user space FPU state.

# Challenges

1. Keep in mind that this FPU code is ingrained in our display driver and
performs several crucial tasks. Additionally, we already have multiple
architectures available in the kernel and a large set of users; in other words,
we prefer to avoid a radical approach that might break our user's system.

2. We share our display code with Windows; thus, we need to maintain the
interoperability between these two systems.

3. We need a mechanism for identifying which function uses FPU registers;
fortunately, Peter Zijlstra wrote a series a couple of months ago where he
introduced an FPU check for objtool. I used the following command for
identifying the potential FPU usage:

 ./tools/objtool/objtool check -Ffa "drivers/gpu/drm/amd/display/dc/ANY_FILE.o"

4. Since our code heavily relies on FPU and the fact that we spread
kernel_fpu_begin/end across multiple functions, we can have some complex
scenarios that will require code refactoring. However, we want to avoid
complicated changes since this is a formula to introduce regressions; we want
something that allows us to fix it in small, safe, and reliable steps.

# Our approach

For trying to solve this problem, we came up with the following strategy:

1. Keep in mind that we are using kernel_fpu_begin/end spread in various areas
and sometimes across multiple functions. If we try to move some of the
functions to an isolated place, we can generate a situation where we can call
the FPU protection more than once, causing multiple warnings. We can deal with
this problem by adding a thin management layer around the kernel_fpu_begin/end
used inside the display.

2. We will need a trace mechanism for this FPU management inside our display
code.

3. After we get the thin layer that manages FPU, we can start to move each
function that uses FPU to the centralized place. Our DQE runs multiple tests in
different ASICs every week; we can take advantage of this to ensure that our
FPU patches work does not introduce any regression. The idea is to work on a
specific part of the code every week (e.g.,  week 1: DCN2, week 1: DCN2.1,
etc.).

4. Finally, after we can isolate the FPU operations in a single place, we can
altogether remove the FPU flags from other files and eliminate an unnecessary
code introduced to deal with this problem.

# This series

To maintain the interoperability between multiple OSes, we already have a
define named DC_FP_START/END, which is a straightforward wrapper to
kernel_fpu_begin/end in the Linux side. In this series, I decided to expand the
scope of this DC_FP_* wrapper to trace FPU entrance and exit in the display
code, but I also add a mechanism for managing the entrance and exit of
kernel_fpu_begin/end. You can see the details on how I did that in the last two
patches.

I also isolate a simple function that requires FPU access to demonstrate my
strategy for isolating this FPU access in a single place. If this series gets
accepted, the following steps consist of moving all FPU functions weekly until
we isolate everything in the fpu_operation folder.

Best Regards
Rodrigo Siqueira


Rodrigo Siqueira (4):
  drm/amd/display: Introduce FPU directory inside DC
  drm/amd/display: Add FPU event trace
  drm/amd/display: Add ref count control for FPU utilization
  drm/amd/display: Add DC_FP helper to check FPU state

 .../gpu/drm/amd/display/amdgpu_dm/Makefile|   3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   |  24 
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 111 ++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  34 ++
 drivers/gpu/drm/amd/display/dc/Makefile   |   1 +
 drivers/gpu/drm/amd/display/dc/dc_trace.h |   3 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  41 +--
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |   2 -
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |   2 +
 .../amd/display/dc/fpu_operations/Makefile|  58 +
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 106 +
 .../drm/amd/display/dc/fpu_operations/dcn2x.h |  33 ++
 drivers/gpu/drm/amd/display/dc/os_types.h |   6 +-
 13 files changed, 381 insertions(+), 43 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/Makefile
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.h

-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amd/display: Add DC_FP helper to check FPU state

2021-03-31 Thread Rodrigo Siqueira

To fully isolate FPU operations in a single place, we must avoid
situations where compilers spill FP values to registers due to FP enable
in a specific C file. Note that even if we isolate all FPU functions in
a single file and call its interface from other files, the compiler
might enable the use of FPU before we call DC_FP_START. Nevertheless, it
is the programmer's responsibility to invoke DC_FP_START/END in the
correct place. To highlight situations where developers forgot to use
the FP protection before calling the DC FPU interface functions, we
introduce a helper that checks if the function is invoked under FP
protection. If not, it will trigger a kernel warning.

Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 34 ---
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  1 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  2 ++
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 17 ++
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 5dcefe193523..0d95f680b62b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -40,6 +40,25 @@
  */
 
 static DEFINE_PER_CPU(atomic_t, fpu_ref);
+static DEFINE_PER_CPU(atomic_t, fp_dc_enabled);
+
+/**
+ * is_fp_dc_enabled - Check if FPU protection is enabled
+ *
+ * This function tells if the code is already under FPU protection or not. A
+ * function that works as an API for a set of FPU operations can use this
+ * function for checking if the caller invoked it after DC_FP_START(). For
+ * example, take a look at dcn2x.c file.
+ *
+ * Return:
+ * Return true if we already enabled FPU protection, otherwise return false.
+ */
+inline bool is_fp_dc_enabled(void)
+{
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
+
+   return atomic_read(fp_enabled);
+}
 
 /**
  * dc_fpu_begin - Enables FPU protection
@@ -55,12 +74,15 @@ void dc_fpu_begin(const char *function_name, const int line)
 {
int ret;
atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
 
ret = atomic_inc_return(local_fpu_ref);
TRACE_DCN_FPU(true, function_name, line, ret);
 
-   if (ret == 1)
+   if (ret == 1) {
kernel_fpu_begin();
+   atomic_set(fp_enabled, 1);
+   }
 }
 
 /**
@@ -75,13 +97,15 @@ void dc_fpu_begin(const char *function_name, const int line)
  */
 void dc_fpu_end(const char *function_name, const int line)
 {
-
-   int ret;
+   bool ret;
atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+   atomic_t *fp_enabled = this_cpu_ptr(_dc_enabled);
 
-   ret = atomic_dec_return(local_fpu_ref);
+   ret = atomic_dec_and_test(local_fpu_ref);
TRACE_DCN_FPU(false, function_name, line, ret);
 
-   if (!ret)
+   if (ret) {
+   atomic_set(fp_enabled, 0);
kernel_fpu_end();
+   }
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
index fb54983c5c60..e782dfa640bf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
@@ -27,6 +27,7 @@
 #ifndef __DC_FPU_H__
 #define __DC_FPU_H__
 
+bool is_fp_dc_enabled(void);
 void dc_fpu_begin(const char *function_name, const int line);
 void dc_fpu_end(const char *function_name, const int line);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index b58edd012038..d0771e29c243 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2351,7 +2351,9 @@ int dcn20_populate_dml_pipes_from_context(
}
 
/* populate writeback information */
+   DC_FP_START();
dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, 
pipes);
+   DC_FP_END();
 
return pipe_cnt;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c 
b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
index 32b23a182428..1c8a97d342c0 100644
--- a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
+++ b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
@@ -42,6 +42,22 @@
  *that deals with FP register is contained within this call.
  * 3. All function that needs to be accessed outside this file requires a
  *public interface that not uses any FPU reference.
+ * 4. Developers should not use DC_FP_START/END in this file, but they need to
+ *ensure that the caller invokes it before access any function available in
+ *this file. For this reason, public API in this file must invoke
+ *ASSERT(is_fp_dc_enabled());
+ *
+ * Let's expand a little bit more the idea in the code pattern number for. To
+ * fully

[PATCH 2/4] drm/amd/display: Add FPU event trace

2021-03-31 Thread Rodrigo Siqueira

We don't have any mechanism for tracing FPU operations inside the
display core, making the debug work a little bit tricky. For trying to
alleviate this problem, this commit introduces a trace mechanism inside
our DC_FP_START/END macros.

Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile|  3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   | 21 +++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 61 +++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h| 33 ++
 drivers/gpu/drm/amd/display/dc/dc_trace.h |  3 +
 drivers/gpu/drm/amd/display/dc/os_types.h |  6 +-
 6 files changed, 123 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile 
b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 9a3b7bf8ab0b..7d3a7c6dbba3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,7 +25,8 @@
 
 
 
-AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o
+AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o 
\
+   dc_fpu.o
 
 ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 46a33f64cf8e..230bb12c405e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -637,6 +637,27 @@ TRACE_EVENT(amdgpu_refresh_rate_track,
  __entry->refresh_rate_ns)
 );
 
+TRACE_EVENT(dcn_fpu,
+   TP_PROTO(bool begin, const char *function, const int line),
+   TP_ARGS(begin, function, line),
+
+   TP_STRUCT__entry(
+__field(bool, begin)
+__field(const char *, function)
+__field(int, line)
+   ),
+   TP_fast_assign(
+  __entry->begin = begin;
+  __entry->function = function;
+  __entry->line = line;
+   ),
+   TP_printk("%s()+%d: %s",
+ __entry->function,
+ __entry->line,
+ __entry->begin ? "begin" : "end"
+   )
+);
+
 #endif /* _AMDGPU_DM_TRACE_H_ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
new file mode 100644
index ..ff34007509de
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc_trace.h"
+
+#include 
+
+/**
+ * dc_fpu_begin - Enables FPU protection
+ * @function_name: A string containing the function name for debug purposes
+ * @line: A-line number where DC_FP_START was invoked for debug purpose
+ *
+ * This function is responsible for managing the use of kernel_fpu_begin() with
+ * the advantage of providing an event trace for debugging.
+ *
+ * Note: Do not call this function directly; always use DC_FP_START().
+ */
+void dc_fpu_begin(const char *function_name, const int line)
+{
+   TRACE_DCN_FPU(true, function_name, line);
+   kernel_fpu_begin();
+}
+
+/**
+ * dc_fpu_end - Disable FPU protection
+ * @function_name: A string containing the function name for debug purposes
+ * @line: A-line number where DC_FP_END was invoked for debug purpose
+ *
+ * This function is responsible for managing the use of kernel_fpu_end() with
+ * the advantage of providing an

[PATCH 3/4] drm/amd/display: Add ref count control for FPU utilization

2021-03-31 Thread Rodrigo Siqueira

DC invokes DC_FPU_START/END in multiple parts of the code; this can
create a situation where we invoke this FPU operation in a nested way.
For avoiding this situation, this commit adds a mechanism where
dc_fpu_begin/end manages the access to kernel_fpu_begin/end.

Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   | 13 ---
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 34 ---
 drivers/gpu/drm/amd/display/dc/dc_trace.h |  4 +--
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 230bb12c405e..cd4f0d3f37fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -638,23 +638,26 @@ TRACE_EVENT(amdgpu_refresh_rate_track,
 );
 
 TRACE_EVENT(dcn_fpu,
-   TP_PROTO(bool begin, const char *function, const int line),
-   TP_ARGS(begin, function, line),
+   TP_PROTO(bool begin, const char *function, const int line, const 
int ref_count),
+   TP_ARGS(begin, function, line, ref_count),
 
TP_STRUCT__entry(
 __field(bool, begin)
 __field(const char *, function)
 __field(int, line)
+__field(int, ref_count)
),
TP_fast_assign(
   __entry->begin = begin;
   __entry->function = function;
   __entry->line = line;
+  __entry->ref_count = ref_count;
),
-   TP_printk("%s()+%d: %s",
+   TP_printk("%s: ref_count: %d: %s()+%d:",
+ __entry->begin ? "begin" : "end",
+ __entry->ref_count,
  __entry->function,
- __entry->line,
- __entry->begin ? "begin" : "end"
+ __entry->line
)
 );
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ff34007509de..5dcefe193523 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -28,6 +28,19 @@
 
 #include 
 
+/**
+ * DOC: Overview
+ *
+ * DC core uses FPU operations in multiple parts of the code, which requires a
+ * more specialized way to manage these areas' entrance. To fulfill this
+ * requirement, we created some wrapper functions that encapsulate
+ * kernel_fpu_begin/end to better fit our need in the display component. In
+ * summary, in this file, you can find functions related to FPU operation
+ * management.
+ */
+
+static DEFINE_PER_CPU(atomic_t, fpu_ref);
+
 /**
  * dc_fpu_begin - Enables FPU protection
  * @function_name: A string containing the function name for debug purposes
@@ -40,8 +53,14 @@
  */
 void dc_fpu_begin(const char *function_name, const int line)
 {
-   TRACE_DCN_FPU(true, function_name, line);
-   kernel_fpu_begin();
+   int ret;
+   atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+
+   ret = atomic_inc_return(local_fpu_ref);
+   TRACE_DCN_FPU(true, function_name, line, ret);
+
+   if (ret == 1)
+   kernel_fpu_begin();
 }
 
 /**
@@ -56,6 +75,13 @@ void dc_fpu_begin(const char *function_name, const int line)
  */
 void dc_fpu_end(const char *function_name, const int line)
 {
-   TRACE_DCN_FPU(false, function_name, line);
-   kernel_fpu_end();
+
+   int ret;
+   atomic_t *local_fpu_ref = this_cpu_ptr(_ref);
+
+   ret = atomic_dec_return(local_fpu_ref);
+   TRACE_DCN_FPU(false, function_name, line, ret);
+
+   if (!ret)
+   kernel_fpu_end();
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h 
b/drivers/gpu/drm/amd/display/dc/dc_trace.h
index d598ba697e45..c711797e5c9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h
@@ -38,5 +38,5 @@
 #define TRACE_DCN_CLOCK_STATE(dcn_clocks) \
trace_amdgpu_dm_dc_clocks_state(dcn_clocks)
 
-#define TRACE_DCN_FPU(begin, function, line) \
-   trace_dcn_fpu(begin, function, line)
+#define TRACE_DCN_FPU(begin, function, line, ref_count) \
+   trace_dcn_fpu(begin, function, line, ref_count)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/4] drm/amd/display: Introduce FPU directory inside DC

2021-03-31 Thread Rodrigo Siqueira

The display core files rely on FPU operation, which requires to be
compiled with special flags. Ideally, we don't want these FPU operations
to get spread around the DC code; nevertheless, it happens in the
current source. This commit introduces a new directory named
fpu_operations that intends to centralize all files that require the FPU
compilation flag. As part of this new component, this patch also moves
one of the functions that require FPU access to a single shared file.
Notice that this is the first part of the work, and it does not fix the
FPU issue yet; we still need other patches for achieving the complete
isolation of this file.

Signed-off-by: Rodrigo Siqueira 
Acked-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/Makefile   |  1 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 39 +---
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |  2 -
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |  2 +
 .../amd/display/dc/fpu_operations/Makefile| 58 
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 89 +++
 .../drm/amd/display/dc/fpu_operations/dcn2x.h | 33 +++
 7 files changed, 185 insertions(+), 39 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/Makefile
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.h

diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index f33847299bca..7d5b70ed5aca 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -35,6 +35,7 @@ DC_LIBS += dcn301
 DC_LIBS += dcn302
 endif
 
+DC_LIBS += fpu_operations
 DC_LIBS += dce120
 
 DC_LIBS += dce112
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 8fb29f754e44..b58edd012038 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -35,6 +35,8 @@
 #include "include/irq_service_interface.h"
 #include "dcn20/dcn20_resource.h"
 
+#include "fpu_operations/dcn2x.h"
+
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
 #include "dcn20_hubbub.h"
@@ -1974,43 +1976,6 @@ void dcn20_split_stream_for_mpc(
ASSERT(primary_pipe->plane_state);
 }
 
-void dcn20_populate_dml_writeback_from_context(
-   struct dc *dc, struct resource_context *res_ctx, 
display_e2e_pipe_params_st *pipes)
-{
-   int pipe_cnt, i;
-
-   for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-   struct dc_writeback_info *wb_info = 
_ctx->pipe_ctx[i].stream->writeback_info[0];
-
-   if (!res_ctx->pipe_ctx[i].stream)
-   continue;
-
-   /* Set writeback information */
-   pipes[pipe_cnt].dout.wb_enable = (wb_info->wb_enabled == true) 
? 1 : 0;
-   pipes[pipe_cnt].dout.num_active_wb++;
-   pipes[pipe_cnt].dout.wb.wb_src_height = 
wb_info->dwb_params.cnv_params.crop_height;
-   pipes[pipe_cnt].dout.wb.wb_src_width = 
wb_info->dwb_params.cnv_params.crop_width;
-   pipes[pipe_cnt].dout.wb.wb_dst_width = 
wb_info->dwb_params.dest_width;
-   pipes[pipe_cnt].dout.wb.wb_dst_height = 
wb_info->dwb_params.dest_height;
-   pipes[pipe_cnt].dout.wb.wb_htaps_luma = 1;
-   pipes[pipe_cnt].dout.wb.wb_vtaps_luma = 1;
-   pipes[pipe_cnt].dout.wb.wb_htaps_chroma = 
wb_info->dwb_params.scaler_taps.h_taps_c;
-   pipes[pipe_cnt].dout.wb.wb_vtaps_chroma = 
wb_info->dwb_params.scaler_taps.v_taps_c;
-   pipes[pipe_cnt].dout.wb.wb_hratio = 1.0;
-   pipes[pipe_cnt].dout.wb.wb_vratio = 1.0;
-   if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) {
-   if (wb_info->dwb_params.output_depth == 
DWB_OUTPUT_PIXEL_DEPTH_8BPC)
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = 
dm_420_8;
-   else
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = 
dm_420_10;
-   } else
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_444_32;
-
-   pipe_cnt++;
-   }
-
-}
-
 int dcn20_populate_dml_pipes_from_context(
struct dc *dc,
struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
index c8f3127bbcdf..6ec8ff45f0f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
@@ -58,8 +58,6 @@ struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer(
struct dc_state *state,
const struct resource_pool *pool,
struct dc_stream_state *stream);
-void

RE: [PATCH 2/2] drm/amdgpu: Revert "SWDEV-238407 Add clear vf fw support"

2021-03-31 Thread Deng, Emily

[AMD Official Use Only - Internal Distribution Only]

Ping ..

>-Original Message-
>From: Emily Deng 
>Sent: Wednesday, March 31, 2021 2:34 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH 2/2] drm/amdgpu: Revert "SWDEV-238407 Add clear vf fw
>support"
>
>As already moved the support to host driver, so revert this in guest driver.
>This reverts commit 8d5e6f45df5f9073760dea0ab94321615cea16ec.
>
>Signed-off-by: Emily Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 36 ++---
>drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h |  8 --
> 2 files changed, 2 insertions(+), 42 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>index c36c8fca1f64..aa2f8fc4aac8 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>@@ -291,9 +291,8 @@ psp_cmd_submit_buf(struct psp_context *psp,
> amdgpu_asic_invalidate_hdp(psp->adev, NULL);
> }
>
>-/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and
>PSP_ERR_UNKNOWN_COMMAND in SRIOV */
>-skip_unsupport = (psp->cmd_buf_mem->resp.status ==
>TEE_ERROR_NOT_SUPPORTED ||
>-psp->cmd_buf_mem->resp.status ==
>PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
>+/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in
>SRIOV */
>+skip_unsupport = (psp->cmd_buf_mem->resp.status == 0x000a)
>&&
>+amdgpu_sriov_vf(psp->adev);
>
> memcpy((void*)>resp, (void*)>cmd_buf_mem->resp,
>sizeof(struct psp_gfx_resp));
>
>@@ -420,26 +419,6 @@ static int psp_tmr_init(struct psp_context *psp)
> return ret;
> }
>
>-static int psp_clear_vf_fw(struct psp_context *psp) -{
>-int ret;
>-struct psp_gfx_cmd_resp *cmd;
>-
>-if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type !=
>CHIP_NAVI12)
>-return 0;
>-
>-cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
>-if (!cmd)
>-return -ENOMEM;
>-
>-cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
>-
>-ret = psp_cmd_submit_buf(psp, NULL, cmd, psp-
>>fence_buf_mc_addr);
>-kfree(cmd);
>-
>-return ret;
>-}
>-
> static bool psp_skip_tmr(struct psp_context *psp)  {
> switch (psp->adev->asic_type) {
>@@ -1924,12 +1903,6 @@ static int psp_hw_start(struct psp_context *psp)
> return ret;
> }
>
>-ret = psp_clear_vf_fw(psp);
>-if (ret) {
>-DRM_ERROR("PSP clear vf fw!\n");
>-return ret;
>-}
>-
> ret = psp_boot_config_set(adev);
> if (ret) {
> DRM_WARN("PSP set boot config@\n");
>@@ -2448,11 +2421,6 @@ static int psp_hw_fini(void *handle)
> }
>
> psp_asd_unload(psp);
>-ret = psp_clear_vf_fw(psp);
>-if (ret) {
>-DRM_ERROR("PSP clear vf fw!\n");
>-return ret;
>-}
>
> psp_tmr_terminate(psp);
> psp_ring_destroy(psp, PSP_RING_TYPE__KM); diff --git
>a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
>b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
>index dd4d65f7e0f0..b5b1feaa259e 100644
>--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
>+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
>@@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
> GFX_CMD_ID_SETUP_VMR  = 0x0009,   /* setup VMR region */
> GFX_CMD_ID_DESTROY_VMR= 0x000A,   /* destroy VMR region
>*/
> GFX_CMD_ID_PROG_REG   = 0x000B,   /* program regs */
>-GFX_CMD_ID_CLEAR_VF_FW= 0x000D,   /* Clear VF FW, to be
>used on VF shutdown. */
> GFX_CMD_ID_GET_FW_ATTESTATION = 0x000F,   /* Query GPUVA of
>the Fw Attestation DB */
> /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20)
>*/
> GFX_CMD_ID_LOAD_TOC   = 0x0020,   /* Load TOC and obtain
>TMR size */
>@@ -401,11 +400,4 @@ struct psp_gfx_rb_frame
> /* total 64 bytes */
> };
>
>-#define PSP_ERR_UNKNOWN_COMMAND 0x0100
>-
>-enum tee_error_code {
>-TEE_SUCCESS = 0x,
>-TEE_ERROR_NOT_SUPPORTED = 0x000A,
>-};
>-
> #endif /* _PSP_TEE_GFX_IF_H_ */
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov

2021-03-31 Thread Deng, Emily

[AMD Official Use Only - Internal Distribution Only]

Ping ..

>-Original Message-
>From: Emily Deng 
>Sent: Tuesday, March 30, 2021 5:43 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH] drm/amdgpu: Toggle msix after FLR for sriov
>
>From: "Emily.Deng" 
>
>For vf assigned to guest VM, after FLR, the msix table will be reset.
>As the flr is done on host driver. The qemu and vfio driver don't know this,
>and the msix is still enable from qemu and vfio driver side.
>So if want to  re-setup the msix table, first need to disable and re-enable the
>msix from guest VM side or the qemu will do nothing as it thought the msix is
>already enabled.
>
>v2:
>Change name with amdgpu_irq prefix, remove #ifdef.
>
>Signed-off-by: Emily.Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 14 ++
> 1 file changed, 14 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>index 03412543427a..3045f52e613d 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
>@@ -277,6 +277,17 @@ static bool amdgpu_msi_ok(struct amdgpu_device
>*adev)
> return true;
> }
>
>+static void amdgpu_irq_restore_msix(struct amdgpu_device *adev) {
>+u16 ctrl;
>+
>+pci_read_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, );
>+ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
>+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>PCI_MSIX_FLAGS, ctrl);
>+ctrl |= PCI_MSIX_FLAGS_ENABLE;
>+pci_write_config_word(adev->pdev, adev->pdev->msix_cap +
>+PCI_MSIX_FLAGS, ctrl); }
>+
> /**
>  * amdgpu_irq_init - initialize interrupt handling
>  *
>@@ -558,6 +569,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct
>amdgpu_device *adev)  {
> int i, j, k;
>
>+if (amdgpu_sriov_vf(adev))
>+amdgpu_irq_restore_msix(adev);
>+
> for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
> if (!adev->irq.client[i].sources)
> continue;
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 4/6] drm/amdgpu: Disable fetch discovery data from vram for navi12 sriov

2021-03-31 Thread Deng, Emily

[AMD Official Use Only - Internal Distribution Only]

Ping .

>-Original Message-
>From: Emily Deng 
>Sent: Tuesday, March 30, 2021 12:42 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH 4/6] drm/amdgpu: Disable fetch discovery data from vram for
>navi12 sriov
>
>To fix the board disappear issue.
>
>Signed-off-by: Emily Deng 
>---
> drivers/gpu/drm/amd/amdgpu/nv.c | 4 
> 1 file changed, 4 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c
>b/drivers/gpu/drm/amd/amdgpu/nv.c index 46d4bbabce75..48dc171bc759
>100644
>--- a/drivers/gpu/drm/amd/amdgpu/nv.c
>+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
>@@ -693,6 +693,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
> adev->nbio.funcs = _v2_3_funcs;
> adev->nbio.hdp_flush_reg = _v2_3_hdp_flush_reg;
> }
>+
>+if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_NAVI12)
>+amdgpu_discovery = 0;
>+
> adev->hdp.funcs = _v5_0_funcs;
>
> if (adev->asic_type >= CHIP_SIENNA_CICHLID)
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 2/6] drm/amdgpu: Correct the irq numbers for virtual ctrc

2021-03-31 Thread Deng, Emily

[AMD Official Use Only - Internal Distribution Only]

Ping..

>-Original Message-
>From: Emily Deng 
>Sent: Tuesday, March 30, 2021 12:42 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily 
>Subject: [PATCH 2/6] drm/amdgpu: Correct the irq numbers for virtual ctrc
>
>Set the num_types equal to the enabled num_crtc.
>
>Signed-off-by: Emily Deng 
>---
> drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>index 5c11144da051..c03a83a2b7cd 100644
>--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>@@ -768,7 +768,7 @@ static const struct amdgpu_irq_src_funcs
>dce_virtual_crtc_irq_funcs = {
>
> static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev)  {
>-adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1;
>+adev->crtc_irq.num_types = adev->mode_info.num_crtc;
> adev->crtc_irq.funcs = _virtual_crtc_irq_funcs;  }
>
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 1/6] drm/amdgpu: Disable vcn decode ring for sriov navi12

2021-03-31 Thread Deng, Emily

[AMD Official Use Only - Internal Distribution Only]

Ping..

>-Original Message-
>From: Emily Deng 
>Sent: Tuesday, March 30, 2021 12:42 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily ; Min, Frank 
>Subject: [PATCH 1/6] drm/amdgpu: Disable vcn decode ring for sriov navi12
>
>Since vcn decoding ring is not required, so just disable it.
>
>Signed-off-by: Frank.Min 
>Signed-off-by: Emily Deng 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  4 +++-
> drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   | 29 -
> 2 files changed, 17 insertions(+), 16 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>index 8844f650b17f..5d5c41c9d5aa 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>@@ -427,7 +427,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>*adev,
> if (adev->uvd.harvest_config & (1 << i))
> continue;
>
>-if (adev->vcn.inst[i].ring_dec.sched.ready)
>+if (adev->vcn.inst[i].ring_dec.sched.ready ||
>+(adev->asic_type == CHIP_NAVI12 &&
>+amdgpu_sriov_vf(adev)))
> ++num_rings;
> }
> ib_start_alignment = 16;
>diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
>b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
>index 116b9643d5ba..e4b61f3a45fb 100644
>--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
>+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
>@@ -220,21 +220,20 @@ static int vcn_v2_0_hw_init(void *handle)  {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> struct amdgpu_ring *ring = >vcn.inst->ring_dec;
>-int i, r;
>+int i, r = -1;
>
> adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
>  ring->doorbell_index, 0);
>
>-if (amdgpu_sriov_vf(adev))
>+if (amdgpu_sriov_vf(adev)) {
> vcn_v2_0_start_sriov(adev);
>-
>-r = amdgpu_ring_test_helper(ring);
>-if (r)
>-goto done;
>-
>-//Disable vcn decode for sriov
>-if (amdgpu_sriov_vf(adev))
>-ring->sched.ready = false;
>+if (adev->asic_type == CHIP_NAVI12)
>+ring->sched.ready = false;
>+} else {
>+r = amdgpu_ring_test_helper(ring);
>+if (r)
>+goto done;
>+}
>
> for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
> ring = >vcn.inst->ring_enc[i];
>@@ -245,8 +244,11 @@ static int vcn_v2_0_hw_init(void *handle)
>
> done:
> if (!r)
>-DRM_INFO("VCN decode and encode initialized
>successfully(under %s).\n",
>-(adev->pg_flags &
>AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
>+DRM_INFO("VCN %s encode initialized
>successfully(under %s).\n",
>+(adev->asic_type == CHIP_NAVI12 &&
>+amdgpu_sriov_vf(adev))?"":"decode and",
>+(adev->pg_flags &
>+AMD_PG_SUPPORT_VCN_DPG)?"DPG
>Mode":"SPG Mode");
>
> return r;
> }
>@@ -1719,9 +1721,6 @@ int vcn_v2_0_dec_ring_test_ring(struct
>amdgpu_ring *ring)
> unsigned i;
> int r;
>
>-if (amdgpu_sriov_vf(adev))
>-return 0;
>-
> WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
> r = amdgpu_ring_alloc(ring, 4);
> if (r)
>--
>2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] [v2] amd: display: dc: struct dc_state is declared twice

2021-03-31 Thread Wan Jiabing

struct dc_state is declared twice. One has been
declared at 273rd line. Remove the duplicate.

Signed-off-by: Wan Jiabing 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 18ed0d3f247e..8ccf4bcd76b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -581,7 +581,6 @@ struct dc_bounding_box_overrides {
int min_dcfclk_mhz;
 };
 
-struct dc_state;
 struct resource_pool;
 struct dce_hwseq;
 struct gpu_info_soc_bounding_box_v1_0;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Alberto Salvia Novella

24fps is intended for video only. Anything interactive at 24fps, as just
moving the mouse around, is extremely choppy.

No way anyone would prefer that over an interlaced resolution or a lower
resolution. That is, by far, the worst option.

Just try it on your screen, set it to 24Hz or alike, and tell me your
experience. You can't even tell where the mouse is going to go.

On Wed, 31 Mar 2021 at 08:44, Christian König 
wrote:

> Hi Alberto,
>
> well a frame rate of 24Hz is perfectly reasonable for a TV and desktop
> usage.
>
> This is probably caused by the TVs limited HDMI bandwidth and a refresh
> rate of 30/25 Hz for the interlaced mode isn't much better either.
>
> Regards,
> Christian.
>
> Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:
>
> The frame-rate at 24Hz is extremely poor for normal desktop usage.
>
> If the highest resolution, aka 1080p, uses that refresh rate then the
> desktop will default to that frame-rate.
>
> Other progressive modes don't exhibit any issue.
>
> On Tue, 30 Mar 2021 at 18:26, Christian König 
> wrote:
>
>> Hi Alberto,
>>
>> I think the driver should only support resolutions that are *progressive*,
>> but also at least of *50Hz*.
>>
>>
>> Why do you think so?, the 24Hz resolution seems to be the native one of
>> the display.
>>
>> Regards,
>> Christian.
>>
>> Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:
>>
>> This is why I'm using interlaced:
>>
>> $ *xrandr*
>> Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
>> DisplayPort-0 disconnected (normal left inverted right x axis y axis)
>> HDMI-0 connected primary 1920x1080+0+0 (normal left inverted right x axis
>> y axis) 16mm x 9mm
>>1920x*1080i*60.00*+  50.0059.94
>>1920x1080 *24.00*23.98
>>1280x*720*  60.0050.0059.94
>>1024x768  75.0370.0760.00
>>832x624   74.55
>>800x600   72.1975.0060.3256.25
>>720x576   50.00
>>720x576i  50.00
>>720x480   60.0059.94
>>720x480i  60.0059.94
>>640x480   75.0072.8166.6760.0059.94
>>720x400   70.08
>> DVI-0 disconnected (normal left inverted right x axis y axis)
>>
>> I think the driver should only support resolutions that are *progressive*,
>> but also at least of *50Hz*.
>>
>> On Tue, 30 Mar 2021 at 15:41, Christian König <
>> ckoenig.leichtzumer...@gmail.com> wrote:
>>
>>> Mhm, no idea why an interlaced resolution would cause a crash. Maybe
>>> some miscalculation in the display code.
>>>
>>> But apart from that if you just connected your PC to a TV I also
>>> wouldn't recommend using an interlaced resolution in the first place.
>>>
>>> See those resolutions only exists for backward compatibility with analog
>>> hardware.
>>>
>>> I think we would just disable those modes instead of searching for the
>>> bug.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:
>>>
>>> I guessed so.
>>>
>>> The GPU is a Radeon HD5870, and the screen is an old Telefunken TV
>>> (TLFK22LEDPVR1).
>>>
>>> Since my real display got into repair I used this TV meanwhile, and to
>>> my surprise it froze the system.
>>>
>>> On Tue, 30 Mar 2021 at 10:15, Christian König 
>>> wrote:
>>>
 Hi Alberto,

 well what hardware do you have?

 Interlaced resolutions are not used any more on modern hardware, so
 they
 are not well tested.

 Regards,
 Christian.

 Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
 > The entire desktop hangs after some minutes when using the module
 > "radeon" with an interlaced resolution.
 >
 > Easier to trigger by playing a video on Firefox, at least on
 kwin_x11.
 > Wayland didn't exhibit the problem.
 >
 > Other display drivers, from different computers I have tried, didn't
 > allow those interlaced resolutions all together. It seems they know
 > there will be problems.


>>> ___
>>> amd-gfx mailing 
>>> listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>  
>>> 
>>>
>>>
>>>
>>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König

Can you access the system over the network and see if there is anything 
in the system log?


It sounds like the display stack has crashed, but when the sound keeps 
playing the system is most likely still responsive over network.


Thanks,
Christian.

Am 31.03.21 um 10:05 schrieb Alberto Salvia Novella:
What happens is this simple: after a few minutes, about 6 or so, the 
entire content of the screen stays still. In some minor situations 
only the applications panel of KDE Plasma.


If music is playing it continues playing, so only graphics are hung. 
Yet in most cases the power button won't shut down the computer, as it 
usually does.


At least this is the case using kwin on x11, and not on wayland. It 
only happens on "radeon" and not on Intel or "radeonhd".


On Wed, 31 Mar 2021 at 09:48, Christian König 
mailto:christian.koe...@amd.com>> wrote:


Correct, but a TV is intended for videos only. That's why it
implements only the lower HDMI standard.

Interlaced transmits only halve the lines with each frame, so a
60Hz mode effectively either becomes a 30Hz mode, halving the
vertical resolution or adaptive motion compensated which the know
visual artifacts. Depending on what the deinterlacing setting on
your TV is.

You could just add a progressive 1920x540@60 or 1920x1080@30 mode
manually and would have the same effect with probably better
quality. See https://de.wikipedia.org/wiki/Deinterlacing


for reference.

If you can give us some more information what is happening when
the system freeze we could try to narrow this down, but we can't
spend much time on a very specific use case in a driver which is
in maintenance mode.

Regards,
Christian.

Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:

24fps is intended for video only. Anything interactive at 24fps,
as just moving the mouse around, is extremely choppy.

No way anyone would prefer that over an interlaced resolution or
a lower resolution. That is, by far, the worst option.

Just try it on your screen, set it to 24Hz or alike, and tell me
your experience. You can't even tell where the mouse is going to go.

On Wed, 31 Mar 2021 at 08:44, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

well a frame rate of 24Hz is perfectly reasonable for a TV
and desktop usage.

This is probably caused by the TVs limited HDMI bandwidth and
a refresh rate of 30/25 Hz for the interlaced mode isn't much
better either.

Regards,
Christian.

Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

The frame-rate at 24Hz is extremely poor for normal desktop
usage.

If the highest resolution, aka 1080p, uses that refresh rate
then the desktop will default to that frame-rate.

Other progressive modes don't exhibit any issue.

On Tue, 30 Mar 2021 at 18:26, Christian König
mailto:christian.koe...@amd.com>>
wrote:

Hi Alberto,


I think the driver should only support resolutions that
are *progressive*, but also at least of *50Hz*.


Why do you think so?, the 24Hz resolution seems to be
the native one of the display.

Regards,
Christian.

Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

This is why I'm using interlaced:

$ *xrandr*
Screen 0: minimum 320 x 200, current 1920 x 1080,
maximum 8192 x 8192
DisplayPort-0 disconnected (normal left inverted right
x axis y axis)
HDMI-0 connected primary 1920x1080+0+0 (normal left
inverted right x axis y axis) 16mm x 9mm
   1920x*1080i*    60.00*+  50.00    59.94
   1920x1080 *24.00*  23.98
   1280x*720*      60.00    50.00    59.94
   1024x768      75.03    70.07  60.00
   832x624       74.55
   800x600       72.19    75.00  60.32    56.25
   720x576       50.00
   720x576i      50.00
   720x480       60.00    59.94
   720x480i      60.00    59.94
   640x480       75.00    72.81  66.67    60.00    59.94
   720x400       70.08
DVI-0 disconnected (normal left inverted right x axis y
axis)

I think the driver should only support resolutions that
are *progressive*, but also at least of *50Hz*.

On

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König

Correct, but a TV is intended for videos only. That's why it implements 
only the lower HDMI standard.

Interlaced transmits only halve the lines with each frame, so a 60Hz 
mode effectively either becomes a 30Hz mode, halving the vertical 
resolution or adaptive motion compensated which the know visual 
artifacts. Depending on what the deinterlacing setting on your TV is.

You could just add a progressive 1920x540@60 or 1920x1080@30 mode 
manually and would have the same effect with probably better quality. 
See https://de.wikipedia.org/wiki/Deinterlacing for reference.

If you can give us some more information what is happening when the 
system freeze we could try to narrow this down, but we can't spend much 
time on a very specific use case in a driver which is in maintenance mode.

Regards,
Christian.

Am 31.03.21 um 09:21 schrieb Alberto Salvia Novella:
24fps is intended for video only. Anything interactive at 24fps, as 
just moving the mouse around, is extremely choppy.

No way anyone would prefer that over an interlaced resolution or a 
lower resolution. That is, by far, the worst option.

Just try it on your screen, set it to 24Hz or alike, and tell me your 
experience. You can't even tell where the mouse is going to go.

On Wed, 31 Mar 2021 at 08:44, Christian König 
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

well a frame rate of 24Hz is perfectly reasonable for a TV and
desktop usage.

This is probably caused by the TVs limited HDMI bandwidth and a
refresh rate of 30/25 Hz for the interlaced mode isn't much better
either.

Regards,
Christian.

Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

The frame-rate at 24Hz is extremely poor for normal desktop usage.

If the highest resolution, aka 1080p, uses that refresh rate then
the desktop will default to that frame-rate.

Other progressive modes don't exhibit any issue.

On Tue, 30 Mar 2021 at 18:26, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

I think the driver should only support resolutions that are
*progressive*, but also at least of *50Hz*.

Why do you think so?, the 24Hz resolution seems to be the
native one of the display.

Regards,
Christian.

Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

This is why I'm using interlaced:

$ *xrandr*
Screen 0: minimum 320 x 200, current 1920 x 1080, maximum
8192 x 8192
DisplayPort-0 disconnected (normal left inverted right x
axis y axis)
HDMI-0 connected primary 1920x1080+0+0 (normal left inverted
right x axis y axis) 16mm x 9mm
   1920x*1080i*    60.00*+  50.00  59.94
   1920x1080 *24.00*    23.98
   1280x*720*      60.00    50.00  59.94
   1024x768      75.03    70.07    60.00
   832x624       74.55
   800x600       72.19    75.00    60.32  56.25
   720x576       50.00
   720x576i      50.00
   720x480       60.00    59.94
   720x480i      60.00    59.94
   640x480       75.00    72.81    66.67  60.00    59.94
   720x400       70.08
DVI-0 disconnected (normal left inverted right x axis y axis)

I think the driver should only support resolutions that are
*progressive*, but also at least of *50Hz*.

On Tue, 30 Mar 2021 at 15:41, Christian König
mailto:ckoenig.leichtzumer...@gmail.com>> wrote:

Mhm, no idea why an interlaced resolution would cause a
crash. Maybe some miscalculation in the display code.

But apart from that if you just connected your PC to a
TV I also wouldn't recommend using an interlaced
resolution in the first place.

See those resolutions only exists for backward
compatibility with analog hardware.

I think we would just disable those modes instead of
searching for the bug.

Regards,
Christian.

Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:

I guessed so.

The GPU is a Radeon HD5870, and the screen is an old
Telefunken TV (TLFK22LEDPVR1).

Since my real display got into repair I used this TV
meanwhile, and to my surprise it froze the system.

On Tue, 30 Mar 2021 at 10:15, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

well what hardware do you have?

Interlaced resolutions are not used any more on
modern hardware, so they
are not well tested.

Regards,
Christian.

Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
> The entire desktop hangs after some minutes when
using the module
>

[PATCH] drm/amdgpu: change mmhub register access from mmio to rlcg

2021-03-31 Thread Peng Ju Zhou

From: pengzhou 

change mmhub register access from mmio to rlcg if mmhub
indirect access enabled, otherwise access these registers in
the old path.

Signed-off-by: pengzhou 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 39 +
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 33e54eed2eec..7d0ce34dcc45 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -38,6 +38,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15_common.h"
+#include "gc/gc_10_1_0_offset.h"
 
 #include "nbio_v2_3.h"
 
@@ -253,7 +254,10 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM 
flush!\n");
}
 
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+   if (vmhub == AMDGPU_MMHUB_0)
+   WREG32_RLC_NO_KIQ((hub->vm_inv_eng0_req + eng), inv_req);
+   else
+   WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to be cleared
@@ -280,8 +284,10 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
 * add semaphore release after invalidation,
 * write with 0 means semaphore release
 */
-   WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0);
+   if (vmhub == AMDGPU_MMHUB_0)
+   WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+   else
+   WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
 
spin_unlock(>gmc.invalidate_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index f107385faba2..9dae6eb47053 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -29,6 +29,7 @@
 #include "mmhub/mmhub_2_0_0_default.h"
 #include "navi10_enum.h"
 
+#include "gc/gc_10_1_0_offset.h"
 #include "soc15_common.h"
 
 #define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid  0x064d
@@ -165,11 +166,11 @@ static void mmhub_v2_0_setup_vm_pt_regs(struct 
amdgpu_device *adev, uint32_t vmi
 {
struct amdgpu_vmhub *hub = >vmhub[AMDGPU_MMHUB_0];
 
-   WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
 
-   WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+   WREG32_SOC15_OFFSET_RLC(MMHUB, 0, 
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
 }
@@ -180,14 +181,14 @@ static void mmhub_v2_0_init_gart_aperture_regs(struct 
amdgpu_device *adev)
 
mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
 
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 (u32)(adev->gmc.gart_start >> 12));
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
 (u32)(adev->gmc.gart_start >> 44));
 
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
 (u32)(adev->gmc.gart_end >> 12));
-   WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
 (u32)(adev->gmc.gart_end >> 44));
 }
 
@@ -197,9 +198,9 @@ static void mmhub_v2_0_init_system_aperture_regs(struct 
amdgpu_device *adev)
uint32_t tmp;
 
/* Program the AGP BAR */
-   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
-   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
-   WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 
24);
+   WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
 
if (!amdgpu_sriov_vf(adev)) {
/* Program the system aperture low logical page number. */
@@ -304,12 +305,12 @@ static void mmhub_v2_0_enable_system_domain(struct 
amdgpu_device *adev)
 {
uint32_t tmp;
 
-   tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+   tmp =

Re: Interlaced resolutions hang the desktop

2021-03-31 Thread Christian König

Hi Alberto,

well a frame rate of 24Hz is perfectly reasonable for a TV and desktop 
usage.

This is probably caused by the TVs limited HDMI bandwidth and a refresh 
rate of 30/25 Hz for the interlaced mode isn't much better either.

Regards,
Christian.

Am 30.03.21 um 22:59 schrieb Alberto Salvia Novella:

The frame-rate at 24Hz is extremely poor for normal desktop usage.

If the highest resolution, aka 1080p, uses that refresh rate then the 
desktop will default to that frame-rate.

Other progressive modes don't exhibit any issue.

On Tue, 30 Mar 2021 at 18:26, Christian König 
mailto:christian.koe...@amd.com>> wrote:

Hi Alberto,

I think the driver should only support resolutions that are
*progressive*, but also at least of *50Hz*.

Why do you think so?, the 24Hz resolution seems to be the native
one of the display.

Regards,
Christian.

Am 30.03.21 um 17:37 schrieb Alberto Salvia Novella:

This is why I'm using interlaced:

$ *xrandr*
Screen 0: minimum 320 x 200, current 1920 x 1080, maximum 8192 x 8192
DisplayPort-0 disconnected (normal left inverted right x axis y axis)
HDMI-0 connected primary 1920x1080+0+0 (normal left inverted
right x axis y axis) 16mm x 9mm
   1920x*1080i*    60.00*+  50.00    59.94
   1920x1080 *24.00*    23.98
   1280x*720*      60.00    50.00    59.94
   1024x768      75.03    70.07    60.00
   832x624       74.55
   800x600       72.19    75.00    60.32    56.25
   720x576       50.00
   720x576i      50.00
   720x480       60.00    59.94
   720x480i      60.00    59.94
   640x480       75.00    72.81    66.67    60.00  59.94
   720x400       70.08
DVI-0 disconnected (normal left inverted right x axis y axis)

I think the driver should only support resolutions that are
*progressive*, but also at least of *50Hz*.

On Tue, 30 Mar 2021 at 15:41, Christian König
mailto:ckoenig.leichtzumer...@gmail.com>> wrote:

Mhm, no idea why an interlaced resolution would cause a
crash. Maybe some miscalculation in the display code.

But apart from that if you just connected your PC to a TV I
also wouldn't recommend using an interlaced resolution in the
first place.

See those resolutions only exists for backward compatibility
with analog hardware.

I think we would just disable those modes instead of
searching for the bug.

Regards,
Christian.

Am 30.03.21 um 11:07 schrieb Alberto Salvia Novella:

I guessed so.

The GPU is a Radeon HD5870, and the screen is an old
Telefunken TV (TLFK22LEDPVR1).

Since my real display got into repair I used this TV
meanwhile, and to my surprise it froze the system.

On Tue, 30 Mar 2021 at 10:15, Christian König
mailto:christian.koe...@amd.com>>
wrote:

Hi Alberto,

well what hardware do you have?

Interlaced resolutions are not used any more on modern
hardware, so they
are not well tested.

Regards,
Christian.

Am 30.03.21 um 10:04 schrieb Alberto Salvia Novella:
> The entire desktop hangs after some minutes when using
the module
> "radeon" with an interlaced resolution.
>
> Easier to trigger by playing a video on Firefox, at
least on kwin_x11.
> Wayland didn't exhibit the problem.
>
> Other display drivers, from different computers I have
tried, didn't
> allow those interlaced resolutions all together. It
seems they know
> there will be problems.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org  
https://lists.freedesktop.org/mailman/listinfo/amd-gfx  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/4] drm/amdgpu: Macros for vram physical addr calculation

2021-03-31 Thread Christian König


Hi Oak,

have you seen my review comments on the patches?

Regards,
Christian.

Am 30.03.21 um 21:35 schrieb Oak Zeng:

Add one macro to calculate BO's GPU physical address.
And another one to calculate BO's CPU physical address.

Signed-off-by: Oak Zeng 
Suggested-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 7e248a4..b244298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -272,6 +272,9 @@ struct amdgpu_gmc {
  #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) 
(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
  #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) 
(adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
  #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_gpu_va2pa(adev, va) (va - (adev)->gmc.vram_start + 
(adev)->vm_manager.vram_base_offset)
+#define amdgpu_gmc_gpu_pa(adev, bo) amdgpu_gmc_gpu_va2pa(adev, 
amdgpu_bo_gpu_offset(bo))
+#define amdgpu_gmc_cpu_pa(adev, bo) (amdgpu_bo_gpu_offset(bo) - 
(adev)->gmc.vram_start + (adev)->gmc.aper_base)
  
  /**

   * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the 
BAR


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: Revert "SWDEV-238407 Add clear vf fw support"

2021-03-31 Thread Emily Deng

As already moved the support to host driver, so revert this
in guest driver.
This reverts commit 8d5e6f45df5f9073760dea0ab94321615cea16ec.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 36 ++---
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h |  8 --
 2 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c36c8fca1f64..aa2f8fc4aac8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -291,9 +291,8 @@ psp_cmd_submit_buf(struct psp_context *psp,
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
 
-   /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and 
PSP_ERR_UNKNOWN_COMMAND in SRIOV */
-   skip_unsupport = (psp->cmd_buf_mem->resp.status == 
TEE_ERROR_NOT_SUPPORTED ||
-   psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && 
amdgpu_sriov_vf(psp->adev);
+   /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in SRIOV */
+   skip_unsupport = (psp->cmd_buf_mem->resp.status == 0x000a) && 
amdgpu_sriov_vf(psp->adev);
 
memcpy((void*)>resp, (void*)>cmd_buf_mem->resp, sizeof(struct 
psp_gfx_resp));
 
@@ -420,26 +419,6 @@ static int psp_tmr_init(struct psp_context *psp)
return ret;
 }
 
-static int psp_clear_vf_fw(struct psp_context *psp)
-{
-   int ret;
-   struct psp_gfx_cmd_resp *cmd;
-
-   if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
-   return 0;
-
-   cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-   if (!cmd)
-   return -ENOMEM;
-
-   cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
-
-   ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-   kfree(cmd);
-
-   return ret;
-}
-
 static bool psp_skip_tmr(struct psp_context *psp)
 {
switch (psp->adev->asic_type) {
@@ -1924,12 +1903,6 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
 
-   ret = psp_clear_vf_fw(psp);
-   if (ret) {
-   DRM_ERROR("PSP clear vf fw!\n");
-   return ret;
-   }
-
ret = psp_boot_config_set(adev);
if (ret) {
DRM_WARN("PSP set boot config@\n");
@@ -2448,11 +2421,6 @@ static int psp_hw_fini(void *handle)
}
 
psp_asd_unload(psp);
-   ret = psp_clear_vf_fw(psp);
-   if (ret) {
-   DRM_ERROR("PSP clear vf fw!\n");
-   return ret;
-   }
 
psp_tmr_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 
b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index dd4d65f7e0f0..b5b1feaa259e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
 GFX_CMD_ID_SETUP_VMR  = 0x0009,   /* setup VMR region */
 GFX_CMD_ID_DESTROY_VMR= 0x000A,   /* destroy VMR region */
 GFX_CMD_ID_PROG_REG   = 0x000B,   /* program regs */
-GFX_CMD_ID_CLEAR_VF_FW= 0x000D,   /* Clear VF FW, to be used 
on VF shutdown. */
 GFX_CMD_ID_GET_FW_ATTESTATION = 0x000F,   /* Query GPUVA of the Fw 
Attestation DB */
 /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
 GFX_CMD_ID_LOAD_TOC   = 0x0020,   /* Load TOC and obtain TMR 
size */
@@ -401,11 +400,4 @@ struct psp_gfx_rb_frame
 /* total 64 bytes */
 };
 
-#define PSP_ERR_UNKNOWN_COMMAND 0x0100
-
-enum tee_error_code {
-TEE_SUCCESS = 0x,
-TEE_ERROR_NOT_SUPPORTED = 0x000A,
-};
-
 #endif /* _PSP_TEE_GFX_IF_H_ */
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: Revert "SWDEV-238407 drm/amdgpu/sriov: Need to clear kiq position"

2021-03-31 Thread Emily Deng

As already moved the implementation to host driver, so remove this from
guest driver.
This reverts commit 96f7d59858ada4a6372fcb249b04805d14482c49.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index b4fd0394cd08..7e012fa1a3f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7237,7 +7237,6 @@ static int gfx_v10_0_hw_fini(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
-   uint32_t tmp;
 
amdgpu_irq_put(adev, >gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, >gfx.priv_inst_irq, 0);
@@ -7256,11 +7255,6 @@ static int gfx_v10_0_hw_fini(void *handle)
 
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
-   /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
-   tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
-   tmp &= 0xff00;
-   WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
-
return 0;
}
gfx_v10_0_cp_enable(adev, false);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

76 matches

Mail list logo