Re: [PATCH v2 09/20] drm/i915: Remove references to struct drm_device.pdev

2020-12-07 Thread Thomas Zimmermann

ping for a review of the i915 patches

Am 01.12.20 um 11:35 schrieb Thomas Zimmermann:

Using struct drm_device.pdev is deprecated. Convert i915 to struct
drm_device.dev. No functional changes.

v2:
* move gt/ and gvt/ changes into separate patches

Signed-off-by: Thomas Zimmermann 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
  drivers/gpu/drm/i915/display/intel_bios.c |  2 +-
  drivers/gpu/drm/i915/display/intel_cdclk.c| 14 ++---
  drivers/gpu/drm/i915/display/intel_csr.c  |  2 +-
  drivers/gpu/drm/i915/display/intel_dsi_vbt.c  |  2 +-
  drivers/gpu/drm/i915/display/intel_fbdev.c|  2 +-
  drivers/gpu/drm/i915/display/intel_gmbus.c|  2 +-
  .../gpu/drm/i915/display/intel_lpe_audio.c|  5 +++--
  drivers/gpu/drm/i915/display/intel_opregion.c |  6 +++---
  drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
  drivers/gpu/drm/i915/display/intel_panel.c|  4 ++--
  drivers/gpu/drm/i915/display/intel_quirks.c   |  2 +-
  drivers/gpu/drm/i915/display/intel_sdvo.c |  2 +-
  drivers/gpu/drm/i915/display/intel_vga.c  |  8 
  drivers/gpu/drm/i915/gem/i915_gem_phys.c  |  6 +++---
  drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  2 +-
  drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
  drivers/gpu/drm/i915/i915_drv.c   | 20 +--
  drivers/gpu/drm/i915/i915_drv.h   |  2 +-
  drivers/gpu/drm/i915/i915_gem_gtt.c   |  4 ++--
  drivers/gpu/drm/i915/i915_getparam.c  |  5 +++--
  drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
  drivers/gpu/drm/i915/i915_irq.c   |  6 +++---
  drivers/gpu/drm/i915/i915_pmu.c   |  5 +++--
  drivers/gpu/drm/i915/i915_suspend.c   |  4 ++--
  drivers/gpu/drm/i915/i915_switcheroo.c|  4 ++--
  drivers/gpu/drm/i915/i915_vgpu.c  |  2 +-
  drivers/gpu/drm/i915/intel_device_info.c  |  2 +-
  drivers/gpu/drm/i915/intel_region_lmem.c  |  8 
  drivers/gpu/drm/i915/intel_runtime_pm.c   |  2 +-
  drivers/gpu/drm/i915/intel_uncore.c   |  4 ++--
  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
  drivers/gpu/drm/i915/selftests/mock_gtt.c |  2 +-
  32 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c 
b/drivers/gpu/drm/i915/display/intel_bios.c
index 4cc949b228f2..8879676372a3 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2088,7 +2088,7 @@ bool intel_bios_is_valid_vbt(const void *buf, size_t size)
  
  static struct vbt_header *oprom_get_vbt(struct drm_i915_private *dev_priv)

  {
-   struct pci_dev *pdev = dev_priv->drm.pdev;
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
void __iomem *p = NULL, *oprom;
struct vbt_header *vbt;
u16 vbt_size;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index c449d28d0560..a6e13208dc50 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -96,7 +96,7 @@ static void fixed_450mhz_get_cdclk(struct drm_i915_private 
*dev_priv,
  static void i85x_get_cdclk(struct drm_i915_private *dev_priv,
   struct intel_cdclk_config *cdclk_config)
  {
-   struct pci_dev *pdev = dev_priv->drm.pdev;
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
u16 hpllcc = 0;
  
  	/*

@@ -138,7 +138,7 @@ static void i85x_get_cdclk(struct drm_i915_private 
*dev_priv,
  static void i915gm_get_cdclk(struct drm_i915_private *dev_priv,
 struct intel_cdclk_config *cdclk_config)
  {
-   struct pci_dev *pdev = dev_priv->drm.pdev;
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
u16 gcfgc = 0;
  
  	pci_read_config_word(pdev, GCFGC, );

@@ -162,7 +162,7 @@ static void i915gm_get_cdclk(struct drm_i915_private 
*dev_priv,
  static void i945gm_get_cdclk(struct drm_i915_private *dev_priv,
 struct intel_cdclk_config *cdclk_config)
  {
-   struct pci_dev *pdev = dev_priv->drm.pdev;
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
u16 gcfgc = 0;
  
  	pci_read_config_word(pdev, GCFGC, );

@@ -256,7 +256,7 @@ static unsigned int intel_hpll_vco(struct drm_i915_private 
*dev_priv)
  static void g33_get_cdclk(struct drm_i915_private *dev_priv,
  struct intel_cdclk_config *cdclk_config)
  {
-   struct pci_dev *pdev = dev_priv->drm.pdev;
+   struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
static const u8 div_3200[] = { 12, 10,  8,  7, 5, 16 };
static const u8 div_4000[] = { 14, 12, 10,  8, 6, 20 };
static const u8 div_4800[] = { 20, 14, 12, 10, 8, 24 };
@@ -305,7 +305,7 @@ static void g33_get_cdclk(struct drm_i915_private *dev_priv,
  static void pnv_get_cdclk(struct drm_i915_private *dev_priv,
  

RE: [PATCH] drm/amd/pm: update driver if version for dimgrey_cavefish

2020-12-07 Thread Chen, Jiansong (Simon)
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Jiansong Chen 

-Original Message-
From: Zhou1, Tao 
Sent: Monday, December 7, 2020 2:06 PM
To: Chen, Jiansong (Simon) ; Gui, Jack 
; Zhang, Hawking ; 
amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao 
Subject: [PATCH] drm/amd/pm: update driver if version for dimgrey_cavefish

Per PMFW 59.16.0.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
index c1cb472f8f0f..e5aa0725147c 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
@@ -33,7 +33,7 @@
 #define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3B  #define 
SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xC  #define 
SMU11_DRIVER_IF_VERSION_VANGOGH 0x02 -#define 
SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xD
+#define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF

 /* MP Apertures */
 #define MP0_Public0x0380
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add the missing AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag

2020-12-07 Thread Christian König

Am 08.12.20 um 03:50 schrieb Chen Lei:

Add the AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag in amdgpu_gem_create_ioctl.

Signed-off-by: Chen Lei 


NAK, that is not missing but intentionally blocked.

Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7e8265da9..da18aeff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -233,6 +233,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
  AMDGPU_GEM_CREATE_VRAM_CLEARED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
  AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
  AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
  AMDGPU_GEM_CREATE_ENCRYPTED))


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Recall: [PATCH 6/8] drm/amd/pm: add yellow_carp_ppt implementation(V3)

2020-12-07 Thread Liu, Aaron
Liu, Aaron would like to recall the message, "[PATCH 6/8] drm/amd/pm: add 
yellow_carp_ppt implementation(V3)".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Recall: [PATCH 5/8] drm/amd/pm: add smu13 ip support for moment(V3)

2020-12-07 Thread Liu, Aaron
Liu, Aaron would like to recall the message, "[PATCH 5/8] drm/amd/pm: add smu13 
ip support for moment(V3)".
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/8] drm/amd/pm: add yellow_carp_ppt implementation(V3)

2020-12-07 Thread Aaron Liu
yellow_carp_ppt is swsmu layer 2 code for yellow carp.
V2: rename smu_v13_0 to smu_v13_0_1
V3: cleanup code.

Signed-off-by: Aaron Liu 
Acked-by: Alex Deucher 
Reviewed-by: Huang Rui 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile   |   2 +-
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 166 ++
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |  28 +++
 3 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
 create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
index 61ac864ac948..2c7f4f0d5b7e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
@@ -23,7 +23,7 @@
 # Makefile for the 'smu manager' sub-component of powerplay.
 # It provides the smu management services for the driver.
 
-SMU13_MGR = smu_v13_0_1.o
+SMU13_MGR = smu_v13_0_1.o yellow_carp_ppt.o
 
 AMD_SWSMU_SMU13MGR = $(addprefix $(AMD_SWSMU_PATH)/smu13/,$(SMU13_MGR))
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
new file mode 100644
index ..d6686c0e62e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#define SWSMU_CODE_LAYER_L2
+
+#include "amdgpu.h"
+#include "amdgpu_smu.h"
+#include "smu_v13_0_1.h"
+#include "smu13_driver_if_yellow_carp.h"
+#include "yellow_carp_ppt.h"
+#include "smu_v13_0_1_ppsmc.h"
+#include "smu_v13_0_1_pmfw.h"
+#include "smu_cmn.h"
+
+/*
+ * DO NOT use these for err/warn/info/debug messages.
+ * Use dev_err, dev_warn, dev_info and dev_dbg instead.
+ * They are more MGPU friendly.
+ */
+#undef pr_err
+#undef pr_warn
+#undef pr_info
+#undef pr_debug
+
+static struct cmn2asic_msg_mapping yellow_carp_message_map[SMU_MSG_MAX_COUNT] 
= {
+   MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
1),
+   MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
1),
+   MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion,   
1),
+   MSG_MAP(EnableGfxOff,   PPSMC_MSG_EnableGfxOff, 
1),
+   MSG_MAP(DisableGfxOff,  PPSMC_MSG_DisableGfxOff,
1),
+   MSG_MAP(AllowGfxOff,PPSMC_MSG_AllowGfxOff,  
1),
+   MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff,   
1),
+   MSG_MAP(PowerDownVcn,   PPSMC_MSG_PowerDownVcn, 
1),
+   MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn,   
1),
+   MSG_MAP(Spare,  PPSMC_MSG_SPARE0,   
1),
+   MSG_MAP(SetHardMinVcn,  PPSMC_MSG_SetHardMinVcn,
1),
+   MSG_MAP(ActiveProcessNotify,PPSMC_MSG_ActiveProcessNotify,  
1),
+   MSG_MAP(SetDriverDramAddrHigh,  
PPSMC_MSG_SetDriverDramAddrHigh,1),
+   MSG_MAP(SetDriverDramAddrLow,   PPSMC_MSG_SetDriverDramAddrLow, 
1),
+   MSG_MAP(TransferTableSmu2Dram,  
PPSMC_MSG_TransferTableSmu2Dram,1),
+   MSG_MAP(TransferTableDram2Smu,  
PPSMC_MSG_TransferTableDram2Smu,1),
+   MSG_MAP(GfxDeviceDriverReset,   PPSMC_MSG_GfxDeviceDriverReset, 
1),
+   MSG_MAP(GetEnabledSmuFeatures,  
PPSMC_MSG_GetEnabledSmuFeatures,1),
+   MSG_MAP(Spare1, PPSMC_MSG_SPARE1,   
1),
+   MSG_MAP(SetHardMinSocclkByFreq, 
PPSMC_MSG_SetHardMinSocclkByFreq,   1),
+   MSG_MAP(SetSoftMinVcn,  

[PATCH 5/8] drm/amd/pm: add smu13 ip support for moment(V3)

2020-12-07 Thread Aaron Liu
For supporting yellow carp, we need to add smu13 ip
support for the moment.

V2: add smu_v13_0_1.c|h dedicated for apu.
V3: cleanup code.

Signed-off-by: Aaron Liu 
Acked-by: Alex Deucher 
Reviewed-by: Huang Rui 
---
 drivers/gpu/drm/amd/pm/Makefile   |   1 +
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |   1 +
 drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h  |  52 +++
 drivers/gpu/drm/amd/pm/swsmu/Makefile |   2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile   |  30 
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c  | 131 ++
 6 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h
 create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
 create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_1.c

diff --git a/drivers/gpu/drm/amd/pm/Makefile b/drivers/gpu/drm/amd/pm/Makefile
index f01e86030cd1..8cf6eff1ea93 100644
--- a/drivers/gpu/drm/amd/pm/Makefile
+++ b/drivers/gpu/drm/amd/pm/Makefile
@@ -27,6 +27,7 @@ subdir-ccflags-y += \
-I$(FULL_AMD_PATH)/pm/swsmu \
-I$(FULL_AMD_PATH)/pm/swsmu/smu11 \
-I$(FULL_AMD_PATH)/pm/swsmu/smu12 \
+   -I$(FULL_AMD_PATH)/pm/swsmu/smu13 \
-I$(FULL_AMD_PATH)/pm/powerplay \
-I$(FULL_AMD_PATH)/pm/powerplay/smumgr\
-I$(FULL_AMD_PATH)/pm/powerplay/hwmgr
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 9724d6fd82f4..7c49b046c6fa 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -706,6 +706,7 @@ extern const struct amd_ip_funcs smu_ip_funcs;
 
 extern const struct amdgpu_ip_block_version smu_v11_0_ip_block;
 extern const struct amdgpu_ip_block_version smu_v12_0_ip_block;
+extern const struct amdgpu_ip_block_version smu_v13_0_1_ip_block;
 
 bool is_support_sw_smu(struct amdgpu_device *adev);
 int smu_reset(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h 
b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h
new file mode 100644
index ..b2e9258dc06c
--- /dev/null
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMU_V13_0_1_H__
+#define __SMU_V13_0_1_H__
+
+#include "amdgpu_smu.h"
+
+#define SMU13_0_1_DRIVER_IF_VERSION_INV 0x
+#define SMU13_0_1_DRIVER_IF_VERSION_YELLOW_CARP 0x3
+
+/* MP Apertures */
+#define MP0_Public 0x0380
+#define MP0_SRAM   0x0390
+#define MP1_Public 0x03b0
+#define MP1_SRAM   0x03c4
+
+/* address block */
+#define smnMP1_FIRMWARE_FLAGS  0x3010024
+
+
+#if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3)
+
+int smu_v13_0_1_check_fw_status(struct smu_context *smu);
+
+int smu_v13_0_1_check_fw_version(struct smu_context *smu);
+
+int smu_v13_0_1_fini_smc_tables(struct smu_context *smu);
+
+int smu_v13_0_1_set_default_dpm_tables(struct smu_context *smu);
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/Makefile 
b/drivers/gpu/drm/amd/pm/swsmu/Makefile
index 6f281990b7b4..7987c6cf849d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/Makefile
+++ b/drivers/gpu/drm/amd/pm/swsmu/Makefile
@@ -22,7 +22,7 @@
 
 AMD_SWSMU_PATH = ../pm/swsmu
 
-SWSMU_LIBS = smu11 smu12
+SWSMU_LIBS = smu11 smu12 smu13
 
 AMD_SWSMU = $(addsuffix /Makefile,$(addprefix 
$(FULL_AMD_PATH)/pm/swsmu/,$(SWSMU_LIBS)))
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
new file mode 100644
index ..61ac864ac948
--- /dev/null
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile
@@ -0,0 +1,30 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any 

Re: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

2020-12-07 Thread Zhao, Jiange
[AMD Public Use]

Hi Hawking,

The process is to send request first, and then to poll and wait 6 seconds.

So the time delta between the 1-st request and the 11-th request is actually 60 
seconds.

Jiange

From: Zhang, Hawking 
Sent: Tuesday, December 8, 2020 12:16 AM
To: Zhao, Jiange ; amd-gfx@lists.freedesktop.org 

Cc: Chen, Horace ; Zhang, Andy 
Subject: RE: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

[AMD Public Use]

Re - Poll happens every 6 seconds and it will last for 60 seconds.
+   int ret, i = 0;
+   #define AI_MAILBOX_POLL_MSG_REP_MAX 11

The definition seems not match with your description that the polling will last 
for 60s with that fixed, the patch is

Acked-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Zhao, Jiange 
Sent: Monday, December 7, 2020 18:06
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Chen, Horace ; 
Zhang, Andy ; Zhao, Jiange 
Subject: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

From: Jiange Zhao 

In Virtualization case, when one VF is sending too many FLR requests, 
hypervisor would stop responding to this VF's request for a long period of 
time. This is called event guard. During this period of cooling time, guest 
driver should wait instead of doing other things. After this period of time, 
guest driver would resume reset process and return to normal.

Currently, guest driver would wait 12 seconds and return fail if it doesn't get 
response from host.

Solution: extend this waiting time in guest driver and poll response 
periodically. Poll happens every 6 seconds and it will last for 60 seconds.

v2: change the max repetition times from number to macro.

Signed-off-by: Jiange Zhao 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  3 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h |  1 +
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5ce9a9f4cf5..7767ccca526b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_ai_request_reset(struct amdgpu_device *adev)  {
-   return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_ai_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 83b453f5d717..50572635d0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -25,8 +25,9 @@
 #define __MXGPU_AI_H__

 #define AI_MAILBOX_POLL_ACK_TIMEDOUT500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT5000
+#define AI_MAILBOX_POLL_MSG_REP_MAX11

 enum idh_request {
 IDH_REQ_GPU_INIT_ACCESS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 666ed99cc14b..dd5c1e6ce009 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_nv_request_reset(struct amdgpu_device *adev)  {
-   return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < NV_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_nv_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 52605e14a1a5..9f5808616174 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -27,6 +27,7 @@
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT500
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT6000
 #define NV_MAILBOX_POLL_FLR_TIMEDOUT5000
+#define NV_MAILBOX_POLL_MSG_REP_MAX11

 enum idh_request {
 IDH_REQ_GPU_INIT_ACCESS = 1,
--
2.25.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: add the missing AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag

2020-12-07 Thread Chen Lei
Add the AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag in amdgpu_gem_create_ioctl.

Signed-off-by: Chen Lei 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7e8265da9..da18aeff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -233,6 +233,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
  AMDGPU_GEM_CREATE_VRAM_CLEARED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
  AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
  AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
  AMDGPU_GEM_CREATE_ENCRYPTED))
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 1/1] drm/amdgpu: fix sdma instance fw version and feature version init

2020-12-07 Thread Zhou1, Tao
[AMD Public Use]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of
> Stanley.Yang
> Sent: Monday, December 7, 2020 2:47 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yang, Stanley 
> Subject: [PATCH 1/1] drm/amdgpu: fix sdma instance fw version and feature
> version init
> 
> each sdma instance fw_version and feature_version should be set right value
> when asic type isn't between SIENNA_CICHILD and CHIP_DIMGREY_CAVEFISH
> 
> Signed-off-by: Stanley.Yang 
> Change-Id: I1edbf3e0557d771eb4c0b686fa5299a3b5f26e35
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index cb5a6f1437f8..3fca9fc20dc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -197,7 +197,7 @@ static int sdma_v5_2_init_microcode(struct
> amdgpu_device *adev)
>   if (err)
>   goto out;
> 
> - err = sdma_v5_2_init_inst_ctx(
> >sdma.instance[0]);
> + err = sdma_v5_2_init_inst_ctx(
> >sdma.instance[i]);
>   if (err)
>   goto out;
>   }
> --
> 2.17.1
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.fre
> edesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfxdata=04%7C01%7Ctao.zhou1%40amd.com%7Cdfd4ec3683174b0e511
> f08d89a7bf287%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63742
> 9204427689208%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQ
> IjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=bYQ1Gqd
> lrYjqRwHNnfbMxUbaOt37VIKsbdeGDWcn494%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amd/display: add cursor pitch check

2020-12-07 Thread Cornij, Nikola
[AMD Public Use]

Hi Simon,

It looks to me I'm kinda late to the party to look at your questions under 
https://lists.freedesktop.org/archives/amd-gfx/2020-November/056032.html...

Does the commit below and 
https://lists.freedesktop.org/archives/amd-gfx/2020-December/057048.html mean 
the above issue is now on its way to resolution?

Thanks,

Nikola


-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Wednesday, December 2, 2020 5:25 PM
To: Kazlauskas, Nicholas 
Cc: Deucher, Alexander ; Simon Ser 
; Wentland, Harry ; amd-gfx list 

Subject: Re: [PATCH 2/2] drm/amd/display: add cursor pitch check

On Wed, Dec 2, 2020 at 4:33 PM Kazlauskas, Nicholas 
 wrote:
>
> On 2020-12-02 4:09 p.m., Simon Ser wrote:
> > Replace the width check with a pitch check, which matches DM internals.
> > Add a new check to make sure the pitch (in pixels) matches the width.
> >
> > Signed-off-by: Simon Ser 
> > Cc: Alex Deucher 
> > Cc: Harry Wentland 
> > Cc: Nicholas Kazlauskas 
>
> Series is:
>
> Reviewed-by: Nicholas Kazlauskas 

Applied.  Thanks!

Alex

>
> Regards,
> Nicholas Kazlauskas
>
> > ---
> >   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++
> >   1 file changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 9e328101187e..862a59703060 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -8988,6 +8988,7 @@ static int dm_update_plane_state(struct dc *dc,
> >   struct amdgpu_crtc *new_acrtc;
> >   bool needs_reset;
> >   int ret = 0;
> > + unsigned int pitch;
> >
> >
> >   new_plane_crtc = new_plane_state->crtc; @@ -9021,15 +9022,25 
> > @@ static int dm_update_plane_state(struct dc *dc,
> >   return -EINVAL;
> >   }
> >
> > - switch (new_plane_state->fb->width) {
> > + /* Pitch in pixels */
> > + pitch = new_plane_state->fb->pitches[0] / 
> > + new_plane_state->fb->format->cpp[0];
> > +
> > + if (new_plane_state->fb->width != pitch) {
> > + DRM_DEBUG_ATOMIC("Cursor FB width %d doesn't 
> > match pitch %d",
> > +  new_plane_state->fb->width,
> > +  pitch);
> > + return -EINVAL;
> > + }
> > +
> > + switch (pitch) {
> >   case 64:
> >   case 128:
> >   case 256:
> > - /* FB width is supported by cursor plane */
> > + /* FB pitch is supported by cursor 
> > + plane */
> >   break;
> >   default:
> > - DRM_DEBUG_ATOMIC("Bad cursor FB width %d\n",
> > -  new_plane_state->fb->width);
> > + DRM_DEBUG_ATOMIC("Bad cursor FB pitch %d 
> > px\n",
> > +  pitch);
> >   return -EINVAL;
> >   }
> >   }
> >
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cni
> kola.cornij%40amd.com%7C0837b50ac7d6455eef1b08d897111300%7C3dd8961fe48
> 84e608e11a82d994e183d%7C0%7C0%7C637425446939520218%7CUnknown%7CTWFpbGZ
> sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
> D%7C1000sdata=DVh%2FvXWkMo%2FQiuV3OclCptN1ctSWpJaPR1sND3jXvHc%3D&
> amp;reserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cnikola.cornij%40amd.com%7C0837b50ac7d6455eef1b08d897111300%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637425446939520218%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=DVh%2FvXWkMo%2FQiuV3OclCptN1ctSWpJaPR1sND3jXvHc%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Drop unnecessary function call

2020-12-07 Thread Rodrigo Siqueira
After refactor our amdgpu_dm_atomic_commit, this function only invoke
drm_atomic_helper_commit. For this reason, this commit drops
amdgpu_dm_atomic_commit and add drm_atomic_helper_commit directly in the
atomic_commit hook.

Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c| 16 +---
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a37948f2e596..c89066b1c471 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2212,7 +2212,7 @@ static const struct drm_mode_config_funcs 
amdgpu_dm_mode_funcs = {
.get_format_info = amd_get_format_info,
.output_poll_changed = drm_fb_helper_output_poll_changed,
.atomic_check = amdgpu_dm_atomic_check,
-   .atomic_commit = amdgpu_dm_atomic_commit,
+   .atomic_commit = drm_atomic_helper_commit,
 };
 
 static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = 
{
@@ -8158,20 +8158,6 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct 
drm_crtc_state *crtc_stat
stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
 }
 
-static int amdgpu_dm_atomic_commit(struct drm_device *dev,
-  struct drm_atomic_state *state,
-  bool nonblock)
-{
-   /*
-* Add check here for SoC's that support hardware cursor plane, to
-* unset legacy_cursor_update
-*/
-
-   return drm_atomic_helper_commit(dev, state, nonblock);
-
-   /*TODO Handle EINTR, reenable IRQ*/
-}
-
 /**
  * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
  * @state: The atomic state to commit
-- 
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: make DRM_AMD_DC x86-only again

2020-12-07 Thread Arnd Bergmann
On Mon, Dec 7, 2020 at 11:08 PM 'Nick Desaulniers' via Clang Built
Linux  wrote:
>
> On Mon, Dec 7, 2020 at 1:57 PM Arnd Bergmann  wrote:

> >
> > Right, looking at my latest randconfig logs, I see the same problem on x86
> > builds with clang as well, though I'm not entirely sure which other
> > configuration
> > options are needed to trigger it.
> >
> > So my patch can be disregarded, but I agree this needs a better fix,
> > either in clang or in the dcn driver.
>
> If you could give https://github.com/ClangBuiltLinux/frame-larger-than
> a spin again, I would appreciate any feedback.

I've already tried it, but the tool doesn't seem to like me, I never
get the information out of it that I want. This time it failed because
it could not parse the .o file correctly.

  Arnd
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: make DRM_AMD_DC x86-only again

2020-12-07 Thread Arnd Bergmann
On Mon, Dec 7, 2020 at 9:50 PM Christian König  wrote:
> Am 07.12.20 um 21:47 schrieb Alex Deucher:
> > On Fri, Dec 4, 2020 at 3:13 AM Arnd Bergmann  wrote:
> >> From: Arnd Bergmann 
> >>
> >> As the DRM_AMD_DC_DCN3_0 code was x86-only and fails to build on
> >> arm64, merging it into DRM_AMD_DC means that the top-level symbol
> >> is now x86-only as well.
> >>
> >> Compilation fails on arm64 with clang-12 with
> >>
> >> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3641:6:
> >>  error: stack frame size of 2416 bytes in function 
> >> 'dml30_ModeSupportAndSystemConfigurationFull' 
> >> [-Werror,-Wframe-larger-than=]
> >> void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
> >> *mode_lib)
> >>
> >> I tried to see if the stack usage can be reduced, but this is code
> >> that is described as "This file is gcc-parsable HW gospel, coming
> >> straight from HW engineers." and is written in a way that is inherently
> >> nonportable and not meant to be understood by humans.
> >>
> >> There are probably no non-x86 users of this code, so simplify
> >> the dependency list accordingly.
> > + Daniel, Timothy
> >
> > Others contributed code to enable this on PPC64 and ARM64.
> > Unfortunately, we don't have these platforms to test with within AMD.
> > Does PPC64 have the same stack limitations as ARM64?  Harry, Leo, can
> > you take a look at fixing the stack usage?
>
> This reminds me that I wanted to reply on this.
>
> 2416 is even to much on x86 if you add -Werror :)
>
> So this needs to be fixed anyway.

Right, looking at my latest randconfig logs, I see the same problem on x86
builds with clang as well, though I'm not entirely sure which other
configuration
options are needed to trigger it.

So my patch can be disregarded, but I agree this needs a better fix,
either in clang or in the dcn driver.

   Arnd
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Luben Tuikov
On 2020-12-04 3:13 a.m., Christian König wrote:
> Thinking more about that I came to the conclusion that the whole 
> approach here isn't correct.
> 
> See even when the job has been completed or canceled we still want to 
> restart the timer.
> 
> The reason for this is that the timer is then not restarted for the 
> current job, but for the next job in the queue.

Got it. I'll make that change in patch 5/5 as this patch, 4/5,
only changes the timer timeout function from void to enum, and
doesn't affect behaviour.

> The only valid reason to not restart the timer is that the whole device 
> was hot plugged and we return -ENODEV here. E.g. what Andrey has been 
> working on.

Yes, perhaps something like DRM_TASK_STATUS_ENODEV.
We can add it now or later when Andrey adds his
hotplug/unplug patches.

Regards,
Luben

> 
> Regards,
> Christian.
> 
> Am 04.12.20 um 04:17 schrieb Luben Tuikov:
>> The driver's job timeout handler now returns
>> status indicating back to the DRM layer whether
>> the task (job) was successfully aborted or whether
>> more time should be given to the task to complete.
>>
>> Default behaviour as of this patch, is preserved,
>> except in obvious-by-comment case in the Panfrost
>> driver, as documented below.
>>
>> All drivers which make use of the
>> drm_sched_backend_ops' .timedout_job() callback
>> have been accordingly renamed and return the
>> would've-been default value of
>> DRM_TASK_STATUS_ALIVE to restart the task's
>> timeout timer--this is the old behaviour, and
>> is preserved by this patch.
>>
>> In the case of the Panfrost driver, its timedout
>> callback correctly first checks if the job had
>> completed in due time and if so, it now returns
>> DRM_TASK_STATUS_COMPLETE to notify the DRM layer
>> that the task can be moved to the done list, to be
>> freed later. In the other two subsequent checks,
>> the value of DRM_TASK_STATUS_ALIVE is returned, as
>> per the default behaviour.
>>
>> A more involved driver's solutions can be had
>> in subequent patches.
>>
>> Signed-off-by: Luben Tuikov 
>> Reported-by: kernel test robot 
>>
>> Cc: Alexander Deucher 
>> Cc: Andrey Grodzovsky 
>> Cc: Christian König 
>> Cc: Daniel Vetter 
>> Cc: Lucas Stach 
>> Cc: Russell King 
>> Cc: Christian Gmeiner 
>> Cc: Qiang Yu 
>> Cc: Rob Herring 
>> Cc: Tomeu Vizoso 
>> Cc: Steven Price 
>> Cc: Alyssa Rosenzweig 
>> Cc: Eric Anholt 
>>
>> v2: Use enum as the status of a driver's job
>>  timeout callback method.
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
>>   drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
>>   drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
>>   drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
>>   drivers/gpu/drm/v3d/v3d_sched.c | 32 +
>>   include/drm/gpu_scheduler.h | 20 +---
>>   7 files changed, 57 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index ff48101bab55..a111326cbdde 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -28,7 +28,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_trace.h"
>>   
>> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>> +static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   {
>>  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>>  struct amdgpu_job *job = to_amdgpu_job(s_job);
>> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job 
>> *s_job)
>>  amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) 
>> {
>>  DRM_ERROR("ring %s timeout, but soft recovered\n",
>>s_job->sched->name);
>> -return;
>> +return DRM_TASK_STATUS_ALIVE;
>>  }
>>   
>>  amdgpu_vm_get_task_info(ring->adev, job->pasid, );
>> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job 
>> *s_job)
>>   
>>  if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>  amdgpu_device_gpu_recover(ring->adev, job);
>> +return DRM_TASK_STATUS_ALIVE;
>>  } else {
>>  drm_sched_suspend_timeout(>sched);
>>  if (amdgpu_sriov_vf(adev))
>>  adev->virt.tdr_debug = true;
>> +return DRM_TASK_STATUS_ALIVE;
>>  }
>>   }
>>   
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> index cd46c882269c..c49516942328 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> @@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct 
>> drm_sched_job *sched_job)
>>  return fence;
>>   }
>>   
>> -static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
>> +static enum drm_task_status 

Re: [PATCH] drm/amdgpu: make DRM_AMD_DC x86-only again

2020-12-07 Thread Christian König

Am 07.12.20 um 21:47 schrieb Alex Deucher:

On Fri, Dec 4, 2020 at 3:13 AM Arnd Bergmann  wrote:

From: Arnd Bergmann 

As the DRM_AMD_DC_DCN3_0 code was x86-only and fails to build on
arm64, merging it into DRM_AMD_DC means that the top-level symbol
is now x86-only as well.

Compilation fails on arm64 with clang-12 with

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3641:6:
 error: stack frame size of 2416 bytes in function 
'dml30_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than=]
void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
*mode_lib)

I tried to see if the stack usage can be reduced, but this is code
that is described as "This file is gcc-parsable HW gospel, coming
straight from HW engineers." and is written in a way that is inherently
nonportable and not meant to be understood by humans.

There are probably no non-x86 users of this code, so simplify
the dependency list accordingly.

+ Daniel, Timothy

Others contributed code to enable this on PPC64 and ARM64.
Unfortunately, we don't have these platforms to test with within AMD.
Does PPC64 have the same stack limitations as ARM64?  Harry, Leo, can
you take a look at fixing the stack usage?


This reminds me that I wanted to reply on this.

2416 is even to much on x86 if you add -Werror :)

So this needs to be fixed anyway.

Christian.



Thanks,

Alex


Fixes: 20f2ffe50472 ("drm/amdgpu: fold CONFIG_DRM_AMD_DC_DCN3* into 
CONFIG_DRM_AMD_DC_DCN (v3)")
Signed-off-by: Arnd Bergmann 
---
  drivers/gpu/drm/amd/display/Kconfig | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig 
b/drivers/gpu/drm/amd/display/Kconfig
index 797b5d4b43e5..54aa50d4deba 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
 bool "AMD DC - Enable new display engine"
 default y
 select SND_HDA_COMPONENT if SND_HDA_CORE
-   select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && 
!(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+   select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && 
KCOV_ENABLE_COMPARISONS)
 help
   Choose this option if you want to use the new display engine
   support for AMDGPU. This adds required support for Vega and
--
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cchristian.koenig%40amd.com%7Cba72f82a98a4443b0dd108d89af15c1e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C1%7C637429708726258711%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=EU1LuB3uxSCrtAw%2BgwD%2FFWsYpZMp1FbffZvkerQ7WVs%3Dreserved=0


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: make DRM_AMD_DC x86-only again

2020-12-07 Thread Alex Deucher
On Fri, Dec 4, 2020 at 3:13 AM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> As the DRM_AMD_DC_DCN3_0 code was x86-only and fails to build on
> arm64, merging it into DRM_AMD_DC means that the top-level symbol
> is now x86-only as well.
>
> Compilation fails on arm64 with clang-12 with
>
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3641:6:
>  error: stack frame size of 2416 bytes in function 
> 'dml30_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than=]
> void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
> *mode_lib)
>
> I tried to see if the stack usage can be reduced, but this is code
> that is described as "This file is gcc-parsable HW gospel, coming
> straight from HW engineers." and is written in a way that is inherently
> nonportable and not meant to be understood by humans.
>
> There are probably no non-x86 users of this code, so simplify
> the dependency list accordingly.

+ Daniel, Timothy

Others contributed code to enable this on PPC64 and ARM64.
Unfortunately, we don't have these platforms to test with within AMD.
Does PPC64 have the same stack limitations as ARM64?  Harry, Leo, can
you take a look at fixing the stack usage?

Thanks,

Alex

>
> Fixes: 20f2ffe50472 ("drm/amdgpu: fold CONFIG_DRM_AMD_DC_DCN3* into 
> CONFIG_DRM_AMD_DC_DCN (v3)")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/gpu/drm/amd/display/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/Kconfig 
> b/drivers/gpu/drm/amd/display/Kconfig
> index 797b5d4b43e5..54aa50d4deba 100644
> --- a/drivers/gpu/drm/amd/display/Kconfig
> +++ b/drivers/gpu/drm/amd/display/Kconfig
> @@ -6,7 +6,7 @@ config DRM_AMD_DC
> bool "AMD DC - Enable new display engine"
> default y
> select SND_HDA_COMPONENT if SND_HDA_CORE
> -   select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && 
> KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
> +   select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && 
> KCOV_ENABLE_COMPARISONS)
> help
>   Choose this option if you want to use the new display engine
>   support for AMDGPU. This adds required support for Vega and
> --
> 2.27.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amd/display: add S/G support for Vangogh

2020-12-07 Thread Kazlauskas, Nicholas

On 2020-12-07 3:03 p.m., roman...@amd.com wrote:

From: Roman Li 

[Why]
Scatter/gather feature is supported on Vangogh.

[How]
Add GTT domain support for Vangogh to enable
display buffers in system memory.

Signed-off-by: Roman Li 


Series is:

Reviewed-by: Nicholas Kazlauskas 

Regards,
Nicholas Kazlauskas


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 63401bc8f37b..a638709e9c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -526,6 +526,7 @@ uint32_t amdgpu_display_supported_domains(struct 
amdgpu_device *adev,
domain |= AMDGPU_GEM_DOMAIN_GTT;
break;
case CHIP_RENOIR:
+   case CHIP_VANGOGH:
domain |= AMDGPU_GEM_DOMAIN_GTT;
break;
  



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd/display: add S/G support for Vangogh

2020-12-07 Thread Roman.Li
From: Roman Li 

[Why]
Scatter/gather feature is supported on Vangogh.

[How]
Add GTT domain support for Vangogh to enable
display buffers in system memory.

Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 63401bc8f37b..a638709e9c92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -526,6 +526,7 @@ uint32_t amdgpu_display_supported_domains(struct 
amdgpu_device *adev,
domain |= AMDGPU_GEM_DOMAIN_GTT;
break;
case CHIP_RENOIR:
+   case CHIP_VANGOGH:
domain |= AMDGPU_GEM_DOMAIN_GTT;
break;
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amd/display: setup system context for APUs

2020-12-07 Thread Roman.Li
From: Roman Li 

[Why]
Scatter/gather is APU feature.
But in dm it is limited only to Renoir.
Now we need it for Vangogh.

[How]
Apply system context setup in dm_init to all APUs.

Signed-off-by: Roman Li 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a37948f2e596..fb2b23b696c2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1093,7 +1093,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
dc_hardware_init(adev->dm.dc);
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags) {
struct dc_phy_addr_space_config pa_config;
 
mmhub_read_system_context(adev, _config);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/5] drm/sched: Make use of a "done" list (v2)

2020-12-07 Thread Luben Tuikov
On 2020-12-04 3:16 a.m., Christian König wrote:
> Am 04.12.20 um 04:17 schrieb Luben Tuikov:
>> The drm_sched_job_done() callback now moves done
>> jobs from the pending list to a "done" list.
>>
>> In drm_sched_job_timeout, make use of the status
>> returned by a GPU driver job timeout handler to
>> decide whether to leave the oldest job in the
>> pending list, or to send it off to the done list.
>> If a driver's job timeout callback returns a
>> status that that job is done, it is added to the
>> done list and the done thread woken up. If that
>> job needs more time, it is left on the pending
>> list and the timeout timer restarted.
>>
>> The idea is that a GPU driver can check the IP to
>> which the passed-in job belongs to and determine
>> whether the IP is alive and well, or if it needs
>> more time to complete this job and perhaps others
>> also executing on it.
>>
>> In drm_sched_job_timeout(), the main scheduler
>> thread is now parked, before calling a driver's
>> timeout_job callback, so as to not compete pushing
>> jobs down to the GPU while the recovery method is
>> taking place.
>>
>> Eliminate the polling mechanism of picking out done
>> jobs from the pending list, i.e. eliminate
>> drm_sched_get_cleanup_job().
>>
>> This also eliminates the eldest job disappearing
>> from the pending list, while the driver timeout
>> handler is called.
>>
>> Various other optimizations to the GPU scheduler
>> and job recovery are possible with this format.
>>
>> Signed-off-by: Luben Tuikov 
>>
>> Cc: Alexander Deucher 
>> Cc: Andrey Grodzovsky 
>> Cc: Christian König 
>> Cc: Daniel Vetter 
>> Cc: Lucas Stach 
>> Cc: Russell King 
>> Cc: Christian Gmeiner 
>> Cc: Qiang Yu 
>> Cc: Rob Herring 
>> Cc: Tomeu Vizoso 
>> Cc: Steven Price 
>> Cc: Alyssa Rosenzweig 
>> Cc: Eric Anholt 
>>
>> v2: Dispell using a done thread, so as to keep
>>  the cache hot on the same processor.
>> ---
>>   drivers/gpu/drm/scheduler/sched_main.c | 247 +
>>   include/drm/gpu_scheduler.h|   4 +
>>   2 files changed, 134 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index b9876cad94f2..d77180b44998 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -164,7 +164,9 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
>>* drm_sched_job_done - complete a job
>>* @s_job: pointer to the job which is done
>>*
>> - * Finish the job's fence and wake up the worker thread.
>> + * Move the completed task to the done list,
>> + * signal the its fence to mark it finished,
>> + * and wake up the worker thread.
>>*/
>>   static void drm_sched_job_done(struct drm_sched_job *s_job)
>>   {
>> @@ -176,9 +178,14 @@ static void drm_sched_job_done(struct drm_sched_job 
>> *s_job)
>>   
>>  trace_drm_sched_process_job(s_fence);
>>   
>> +spin_lock(>job_list_lock);
>> +list_move(_job->list, >done_list);
>> +spin_unlock(>job_list_lock);
>> +
> 
> That is racy, as soon as the spinlock is dropped the job and with it the 
> s_fence might haven been destroyed.

Yeah, I had it the other way around, (the correct way),
and changed it--not sure why. I revert it back. Thanks
for catching this.

Regards,
Luben

> 
>>  dma_fence_get(_fence->finished);
>>  drm_sched_fence_finished(s_fence);
>>  dma_fence_put(_fence->finished);
> 
> In other words this here needs to come first.
> 
> Regards,
> Christian.
> 
>> +
>>  wake_up_interruptible(>wake_up_worker);
>>   }
>>   
>> @@ -309,6 +316,37 @@ static void drm_sched_job_begin(struct drm_sched_job 
>> *s_job)
>>  spin_unlock(>job_list_lock);
>>   }
>>   
>> +/** drm_sched_job_timeout -- a timer timeout occurred
>> + * @work: pointer to work_struct
>> + *
>> + * First, park the scheduler thread whose IP timed out,
>> + * so that we don't race with the scheduler thread pushing
>> + * jobs down the IP as we try to investigate what
>> + * happened and give drivers a chance to recover.
>> + *
>> + * Second, take the fist job in the pending list
>> + * (oldest), leave it in the pending list and call the
>> + * driver's timer timeout callback to find out what
>> + * happened, passing this job as the suspect one.
>> + *
>> + * The driver may return DRM_TASK_STATUS COMPLETE,
>> + * which means the task is not in the IP(*) and we move
>> + * it to the done list to free it.
>> + *
>> + * (*) A reason for this would be, say, that the job
>> + * completed in due time, or the driver has aborted
>> + * this job using driver specific methods in the
>> + * timedout_job callback and has now removed it from
>> + * the hardware.
>> + *
>> + * Or, the driver may return DRM_TASK_STATUS_ALIVE, to
>> + * indicate that it had inquired about this job, and it
>> + * has verified that this job is alive and well, and
>> + * that the DRM layer should give this task more time
>> + * to complete. In this case, we restart 

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky


On 12/7/20 2:19 PM, Christian König wrote:

Am 07.12.20 um 20:09 schrieb Andrey Grodzovsky:


On 12/7/20 1:04 PM, Christian König wrote:

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole 
approach here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device 
was hot plugged and we return -ENODEV here. E.g. what Andrey has been 
working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in drm_sched_job_begin 
should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the 
timer, I don't see how it is related to whether the HW was idle before ?


It doesn't rearm the timer. It initially starts the timer when the hardware 
is idle.



It schedules delayed work for the timer task if ring mirror list not empty. 
Am i missing something ?



Ok, let me explain from the beginning.

drm_sched_start_timeout() initially starts the timer, it does NOT rearms it! 
When the timer is already running it doesn't has any effect at all.


In a sense that delayed work cannot be enqueued while another instance is still 
in the queue I agree.

I forgot about this in the context of drm_sched_start_timeout.




When a job completes drm_sched_get_cleanup_job() cancels the timer, frees the 
job and then starts a new timer for the engine.


When a timeout happens the job is either canceled or give some extra time by 
putting it back on the pending list.


When the job is canceled the timer must be restarted for the next job, because 
drm_sched_job_begin() was already called long ago.



Now i get it. Next job might have called (and probably did) drm_sched_job_begin 
while previous timer work (currently executing one)
was still in the workqueue and so we cannot count on it to actually have 
restarted the timer and so we must do it.






When the job gets some extra time we should also restart the timer.



Same as above.

Thanks for clarifying this.

Andrey




The only case when the timer should not be restarted is when the device was 
hotplugged and is completely gone now.


I think the right approach to stop this messing with the ring mirror list is 
to avoid using the job altogether for recovery.


What we should do instead is to put the recovery information on the scheduler 
fence, because that is the object which stays alive after pushing the job to 
the hardware.






Christian.



Andrey




Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() because 
it doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Christian König

Am 07.12.20 um 20:09 schrieb Andrey Grodzovsky:


On 12/7/20 1:04 PM, Christian König wrote:

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole 
approach here isn't correct.


See even when the job has been completed or canceled we still 
want to restart the timer.


The reason for this is that the timer is then not restarted for 
the current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole 
device was hot plugged and we return -ENODEV here. E.g. what 
Andrey has been working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in 
drm_sched_job_begin should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and 
starts the timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms 
the timer, I don't see how it is related to whether the HW was idle 
before ?


It doesn't rearm the timer. It initially starts the timer when the 
hardware is idle.



It schedules delayed work for the timer task if ring mirror list not 
empty. Am i missing something ?



Ok, let me explain from the beginning.

drm_sched_start_timeout() initially starts the timer, it does NOT rearms 
it! When the timer is already running it doesn't has any effect at all.


When a job completes drm_sched_get_cleanup_job() cancels the timer, 
frees the job and then starts a new timer for the engine.


When a timeout happens the job is either canceled or give some extra 
time by putting it back on the pending list.


When the job is canceled the timer must be restarted for the next job, 
because drm_sched_job_begin() was already called long ago.


When the job gets some extra time we should also restart the timer.

The only case when the timer should not be restarted is when the device 
was hotplugged and is completely gone now.


I think the right approach to stop this messing with the ring mirror 
list is to avoid using the job altogether for recovery.


What we should do instead is to put the recovery information on the 
scheduler fence, because that is the object which stays alive after 
pushing the job to the hardware.


Christian.



Andrey




Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() 
because it doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 
+

  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status 

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky


On 12/7/20 1:04 PM, Christian König wrote:

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole approach 
here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device 
was hot plugged and we return -ENODEV here. E.g. what Andrey has been 
working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in drm_sched_job_begin 
should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the 
timer, I don't see how it is related to whether the HW was idle before ?


It doesn't rearm the timer. It initially starts the timer when the hardware is 
idle.



It schedules delayed work for the timer task if ring mirror list not empty. Am i 
missing something ?


Andrey




Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() because it 
doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 +
  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job 
*s_job)

  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, );
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
drm_sched_suspend_timeout(>sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- 

Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Christian König

Am 07.12.20 um 17:00 schrieb Andrey Grodzovsky:


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole 
approach here isn't correct.


See even when the job has been completed or canceled we still want 
to restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole 
device was hot plugged and we return -ENODEV here. E.g. what Andrey 
has been working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in 
drm_sched_job_begin should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts 
the timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms 
the timer, I don't see how it is related to whether the HW was idle 
before ?


It doesn't rearm the timer. It initially starts the timer when the 
hardware is idle.


Christian.



Andrey




The function should probably be renamed to drm_sched_job_pushed() 
because it doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 
+

  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct 
drm_sched_job *s_job)

  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, );
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct 
drm_sched_job *s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
  drm_sched_suspend_timeout(>sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence 

Re: [bug report] drm/amd/display: use FB pitch to fill dc_cursor_attributes

2020-12-07 Thread Dan Carpenter
On Mon, Dec 07, 2020 at 02:53:28PM +, Simon Ser wrote:
> Hi,
> 
> On Monday, December 7th, 2020 at 3:51 PM, Dan Carpenter 
>  wrote:
> 
> > The patch adds some new unchecked dereferences.
> 
> The `if (!position.enable)` check above should ensure the dereference
> isn't unchecked.

Yeah.  You're correct.  Thanks, for looking into this.

regards,
dan carpenter

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/pm: update driver if version for dimgrey_cavefish

2020-12-07 Thread Alex Deucher
On Mon, Dec 7, 2020 at 1:07 AM Tao Zhou  wrote:
>
> Per PMFW 59.16.0.
>
> Signed-off-by: Tao Zhou 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h 
> b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
> index c1cb472f8f0f..e5aa0725147c 100644
> --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
> +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
> @@ -33,7 +33,7 @@
>  #define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3B
>  #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xC
>  #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x02
> -#define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xD
> +#define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF
>
>  /* MP Apertures */
>  #define MP0_Public 0x0380
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/1] drm/amdgpu: fix sdma instance fw version and feature version init

2020-12-07 Thread Alex Deucher
On Mon, Dec 7, 2020 at 1:47 AM Stanley.Yang  wrote:
>
> each sdma instance fw_version and feature_version
> should be set right value when asic type isn't
> between SIENNA_CICHILD and CHIP_DIMGREY_CAVEFISH
>
> Signed-off-by: Stanley.Yang 
> Change-Id: I1edbf3e0557d771eb4c0b686fa5299a3b5f26e35

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index cb5a6f1437f8..3fca9fc20dc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -197,7 +197,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device 
> *adev)
> if (err)
> goto out;
>
> -   err = 
> sdma_v5_2_init_inst_ctx(>sdma.instance[0]);
> +   err = 
> sdma_v5_2_init_inst_ctx(>sdma.instance[i]);
> if (err)
> goto out;
> }
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

2020-12-07 Thread Zhang, Hawking
[AMD Public Use]

Re - Poll happens every 6 seconds and it will last for 60 seconds.
+   int ret, i = 0;
+   #define AI_MAILBOX_POLL_MSG_REP_MAX 11

The definition seems not match with your description that the polling will last 
for 60s with that fixed, the patch is

Acked-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Zhao, Jiange  
Sent: Monday, December 7, 2020 18:06
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Chen, Horace ; 
Zhang, Andy ; Zhao, Jiange 
Subject: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

From: Jiange Zhao 

In Virtualization case, when one VF is sending too many FLR requests, 
hypervisor would stop responding to this VF's request for a long period of 
time. This is called event guard. During this period of cooling time, guest 
driver should wait instead of doing other things. After this period of time, 
guest driver would resume reset process and return to normal.

Currently, guest driver would wait 12 seconds and return fail if it doesn't get 
response from host.

Solution: extend this waiting time in guest driver and poll response 
periodically. Poll happens every 6 seconds and it will last for 60 seconds.

v2: change the max repetition times from number to macro.

Signed-off-by: Jiange Zhao 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  3 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ++-  
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h |  1 +
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5ce9a9f4cf5..7767ccca526b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,
 
 static int xgpu_ai_request_reset(struct amdgpu_device *adev)  {
-   return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_ai_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }
 
 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 83b453f5d717..50572635d0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -25,8 +25,9 @@
 #define __MXGPU_AI_H__
 
 #define AI_MAILBOX_POLL_ACK_TIMEDOUT   500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define AI_MAILBOX_POLL_MSG_REP_MAX11
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 666ed99cc14b..dd5c1e6ce009 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
 
 static int xgpu_nv_request_reset(struct amdgpu_device *adev)  {
-   return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < NV_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_nv_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }
 
 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 52605e14a1a5..9f5808616174 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -27,6 +27,7 @@
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT   500
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define NV_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define NV_MAILBOX_POLL_MSG_REP_MAX11
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
--
2.25.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 14/17] drm/amd/display: Enable gpu_vm_support for dcn3.01

2020-12-07 Thread Brol, Eryk
[AMD Official Use Only - Internal Distribution Only]

Thanks for pointing that out!

Eryk

From: Deucher, Alexander 
Sent: Monday, December 7, 2020 9:40 AM
To: Brol, Eryk ; amd-gfx@lists.freedesktop.org 

Cc: Liu, Charlene ; Li, Sun peng (Leo) 
; Wentland, Harry ; Zhuo, Qingqing 
; Siqueira, Rodrigo ; Pillai, 
Aurabindo ; Sun, Yongqiang ; 
Lakha, Bhawanpreet ; R, Bindu 
Subject: Re: [PATCH 14/17] drm/amd/display: Enable gpu_vm_support for dcn3.01


[AMD Official Use Only - Internal Distribution Only]

We've dropped the CONFIG_DRM_AMD_DC_DCN3* kconfig options recently.

Alex


From: amd-gfx  on behalf of Eryk Brol 

Sent: Friday, December 4, 2020 4:28 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Liu, Charlene ; Brol, Eryk ; Li, 
Sun peng (Leo) ; Wentland, Harry ; 
Zhuo, Qingqing ; Siqueira, Rodrigo 
; Pillai, Aurabindo ; Sun, 
Yongqiang ; Lakha, Bhawanpreet 
; R, Bindu 
Subject: [PATCH 14/17] drm/amd/display: Enable gpu_vm_support for dcn3.01

From: Charlene Liu 

[Why]
dcn3_01 supports gpu_vm, but this is not enabled in amdgpu_dm

Signed-off-by: Charlene Liu 
Reviewed-by: Yongqiang Sun 
Acked-by: Eryk Brol 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 59f738008734..53a7cb21f603 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1035,6 +1035,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
 init_data.flags.disable_dmcu = true;
 break;
+#if defined(CONFIG_DRM_AMD_DC_DCN3_01)
+   case CHIP_VANGOGH:
+   init_data.flags.gpu_vm_support = true;
+   break;
+#endif
 default:
 break;
 }
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Calexander.deucher%40amd.com%7C93575bd3247e42f848bc08d8989bee3e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637427142787650359%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=wf2Tl4Qz9IzTZVvfBRYcFHZ8Cr464ZEZ%2B1Pb1GP5W9E%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 00/17] DC Patches Dec 7, 2020

2020-12-07 Thread Harry Wentland

AMD\ramini (1):
   drm/amd/display: Set FixRate bit in VSIF V3


Please ask Reza to configure his git with his real name.

Harry


On 2020-12-04 4:28 p.m., Eryk Brol wrote:

This DC patchset brings improvements in multiple areas.
In summary, we highlight:

* Fixes in MST, Compliance, HDCP, audio;
* Enhancements in VSIF;
* Improvements in seamless boot, DPG;



AMD\ramini (1):
   drm/amd/display: Set FixRate bit in VSIF V3

Anthony Koo (1):
   drm/amd/display: [FW Promotion] Release 0.0.45

Aric Cyr (1):
   drm/amd/display: 3.2.115

Charlene Liu (1):
   drm/amd/display: Enable gpu_vm_support for dcn3.01

Chris Park (1):
   drm/amd/display: Prevent bandwidth overflow

Dmytro Laktyushkin (1):
   drm/amd/display: Expose clk_mgr functions for reuse

Jing Zhou (1):
   drm/amd/display: Set default bits per channel

John Wu (1):
   drm/amd/display: Don't check seamless boot in power down HW by timeout

Judy Cai (1):
   drm/amd/display: Change to IMMEDIATE mode from FRAME mode

Michael Strauss (1):
   drm/amd/display: Revert DCN2.1 dram_clock_change_latency update

Qingqing Zhuo (2):
   drm/amd/display: NULL pointer error during compliance test
   drm/amd/display: Only one display lights up while using MST hub

Reza Amini (1):
   drm/amd/display: Implement VSIF V3 extended refresh rate feature

Roy Chan (1):
   drm/amd/display: Fixed the audio noise during mode switching with HDCP
 mode on

Sung Lee (1):
   drm/amd/display: Add wm table for Renoir

Wesley Chalmers (1):
   drm/amd/display: Use provided offset for DPG generation

Wyatt Wood (1):
   drm/amd/display: Add support for runtime feature detection command

  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  37 +--
  .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c |  93 +++-
  .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  |   2 +-
  .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c|   8 +-
  .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h|  10 ++
  drivers/gpu/drm/amd/display/dc/core/dc_link.c |  18 ++-
  .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  17 ++-
  drivers/gpu/drm/amd/display/dc/dc.h   |   2 +-
  drivers/gpu/drm/amd/display/dc/dc_link.h  |   4 +
  .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |   5 +-
  .../drm/amd/display/dc/dcn21/dcn21_resource.c |   2 +-
  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|   2 +-
  .../gpu/drm/amd/display/dc/dcn30/dcn30_vpg.c  |  62 +--
  .../gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h  |  38 ++-
  drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |   6 +
  .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  17 ++-
  .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |  29 +
  .../amd/display/modules/freesync/freesync.c   | 104 +++---
  18 files changed, 367 insertions(+), 89 deletions(-)



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Andrey Grodzovsky


On 12/7/20 6:13 AM, Christian König wrote:

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole approach 
here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the current 
job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole device was 
hot plugged and we return -ENODEV here. E.g. what Andrey has been working on.



We discussed this with Luben off line a few days ago but came to a conclusion 
that for the next job the timer restart in drm_sched_job_begin should do the 
work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts the 
timeout in case the hardware was idle before.



drm_sched_job_begin only adds the job to ring mirror list and rearms the timer, 
I don't see how it is related to whether the HW was idle before ?


Andrey




The function should probably be renamed to drm_sched_job_pushed() because it 
doesn't begin the execution in any way.


Christian.








Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 +
  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct drm_sched_job *s_job)
  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, );
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job 
*s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
  drm_sched_suspend_timeout(>sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct 
drm_sched_job *sched_job)

  return fence;
  }
  -static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
+static enum drm_task_status 

Re: [bug report] drm/amd/display: use FB pitch to fill dc_cursor_attributes

2020-12-07 Thread Simon Ser
Hi,

On Monday, December 7th, 2020 at 3:51 PM, Dan Carpenter 
 wrote:

> The patch adds some new unchecked dereferences.

The `if (!position.enable)` check above should ensure the dereference
isn't unchecked.

Simon
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[bug report] drm/amd/display: use FB pitch to fill dc_cursor_attributes

2020-12-07 Thread Dan Carpenter
Hello Simon Ser,

This is a semi-automatic email about new static checker warnings.

The patch 03a663673063: "drm/amd/display: use FB pitch to fill 
dc_cursor_attributes" from Dec 2, 2020, leads to the following Smatch 
complaint:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:7438 
handle_cursor_update()
error: we previously assumed 'afb' could be null (see line 7397)

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c
  7389  static void handle_cursor_update(struct drm_plane *plane,
  7390   struct drm_plane_state 
*old_plane_state)
  7391  {
  7392  struct amdgpu_device *adev = drm_to_adev(plane->dev);
  7393  struct amdgpu_framebuffer *afb = 
to_amdgpu_framebuffer(plane->state->fb);
   
^^
If "plane->state->fb" is NULL then "afb" is NULL.


  7394  struct drm_crtc *crtc = afb ? plane->state->crtc : 
old_plane_state->crtc;
^^^
Checked for NULL.

  7395  struct dm_crtc_state *crtc_state = crtc ? 
to_dm_crtc_state(crtc->state) : NULL;
  7396  struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
  7397  uint64_t address = afb ? afb->address : 0;
  7398  struct dc_cursor_position position;
  7399  struct dc_cursor_attributes attributes;
  7400  int ret;
  7401  
  7402  if (!plane->state->fb && !old_plane_state->fb)
 
These aren't allow to be both NULL

  7403  return;
  7404  
  7405  DRM_DEBUG_DRIVER("%s: crtc_id=%d with size %d to %d\n",
  7406   __func__,
  7407   amdgpu_crtc->crtc_id,
  7408   plane->state->crtc_w,
  7409   plane->state->crtc_h);
  7410  
  7411  ret = get_cursor_position(plane, crtc, );
  7412  if (ret)
  7413  return;
  7414  
  7415  if (!position.enable) {
  7416  /* turn off cursor */
  7417  if (crtc_state && crtc_state->stream) {
  7418  mutex_lock(>dm.dc_lock);
  7419  
dc_stream_set_cursor_position(crtc_state->stream,
  7420);
  7421  mutex_unlock(>dm.dc_lock);
  7422  }
  7423  return;
  7424  }
  7425  
  7426  amdgpu_crtc->cursor_width = plane->state->crtc_w;
  7427  amdgpu_crtc->cursor_height = plane->state->crtc_h;
  7428  
  7429  memset(, 0, sizeof(attributes));
  7430  attributes.address.high_part = upper_32_bits(address);
  7431  attributes.address.low_part  = lower_32_bits(address);
  7432  attributes.width = plane->state->crtc_w;
  7433  attributes.height= plane->state->crtc_h;
  7434  attributes.color_format  = 
CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
  7435  attributes.rotation_angle= 0;
  7436  attributes.attribute_flags.value = 0;
  7437  
  7438  attributes.pitch = afb->base.pitches[0] / 
afb->base.format->cpp[0];
   ^  ^
The patch adds some new unchecked dereferences.

  7439  
  7440  if (crtc_state->stream) {

regards,
dan carpenter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 14/17] drm/amd/display: Enable gpu_vm_support for dcn3.01

2020-12-07 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

We've dropped the CONFIG_DRM_AMD_DC_DCN3* kconfig options recently.

Alex


From: amd-gfx  on behalf of Eryk Brol 

Sent: Friday, December 4, 2020 4:28 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Liu, Charlene ; Brol, Eryk ; Li, 
Sun peng (Leo) ; Wentland, Harry ; 
Zhuo, Qingqing ; Siqueira, Rodrigo 
; Pillai, Aurabindo ; Sun, 
Yongqiang ; Lakha, Bhawanpreet 
; R, Bindu 
Subject: [PATCH 14/17] drm/amd/display: Enable gpu_vm_support for dcn3.01

From: Charlene Liu 

[Why]
dcn3_01 supports gpu_vm, but this is not enabled in amdgpu_dm

Signed-off-by: Charlene Liu 
Reviewed-by: Yongqiang Sun 
Acked-by: Eryk Brol 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 59f738008734..53a7cb21f603 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1035,6 +1035,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
 init_data.flags.disable_dmcu = true;
 break;
+#if defined(CONFIG_DRM_AMD_DC_DCN3_01)
+   case CHIP_VANGOGH:
+   init_data.flags.gpu_vm_support = true;
+   break;
+#endif
 default:
 break;
 }
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Calexander.deucher%40amd.com%7C93575bd3247e42f848bc08d8989bee3e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637427142787650359%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=wf2Tl4Qz9IzTZVvfBRYcFHZ8Cr464ZEZ%2B1Pb1GP5W9E%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 2/2] drm/amd/pm: correct power limit setting for SMU V11

2020-12-07 Thread Lazar, Lijo
[AMD Public Use]

>* BIT 24-31: ControllerId (hardcoded as PPT0)

May be reworded as 'only PPT0 is supported'. Apart from that

Series is Reviewed-by: Lijo Lazar  

-Original Message-
From: amd-gfx  On Behalf Of Evan Quan
Sent: Monday, December 7, 2020 2:53 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Quan, Evan 

Subject: [PATCH 2/2] drm/amd/pm: correct power limit setting for SMU V11

[CAUTION: External Email]

Correct the power limit setting for SMU V11 asics.

Change-Id: Idedc590c35934397bd77b7ac825b063cd319dbbf
Signed-off-by: Evan Quan 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 624065d3c079..3288760a5431 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -929,9 +929,13 @@ int smu_v11_0_get_current_power_limit(struct smu_context 
*smu,
if (power_src < 0)
return -EINVAL;

+   /*
+* BIT 24-31: ControllerId (hardcoded as PPT0)
+* BIT 16-23: PowerSource
+*/
ret = smu_cmn_send_smc_msg_with_param(smu,
  SMU_MSG_GetPptLimit,
- power_src << 16,
+ (0 << 24) | (power_src << 16),
  power_limit);
if (ret)
dev_err(smu->adev->dev, "[%s] get PPT limit failed!", 
__func__); @@ -941,6 +945,7 @@ int smu_v11_0_get_current_power_limit(struct 
smu_context *smu,

 int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)  {
+   int power_src;
int ret = 0;

if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { @@ -948,6 
+953,22 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
return -EOPNOTSUPP;
}

+   power_src = smu_cmn_to_asic_specific_index(smu,
+   CMN2ASIC_MAPPING_PWR,
+   smu->adev->pm.ac_power ?
+   SMU_POWER_SOURCE_AC :
+   SMU_POWER_SOURCE_DC);
+   if (power_src < 0)
+   return -EINVAL;
+
+   /*
+* BIT 24-31: ControllerId (hardcoded as PPT0)
+* BIT 16-23: PowerSource
+* BIT 0-15: PowerLimit
+*/
+   n &= 0x;
+   n |= 0 << 24;
+   n |= (power_src) << 16;
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n, 
NULL);
if (ret) {
dev_err(smu->adev->dev, "[%s] Set power limit Failed!\n", 
__func__);
--
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Clijo.lazar%40amd.com%7C8e117d60de0c4c12bb9b08d89a91b045%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637429297804635030%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=Hymngc63wUytPWZCrmmrKl%2FCdqF067sTlQCbfBNrndA%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu/pm: add smc v2_1 printer in amdgpu_ucode_print_smc_hdr()

2020-12-07 Thread Lazar, Lijo
[AMD Public Use]

Reviewed-by: Lijo Lazar 

-Original Message-
From: amd-gfx  On Behalf Of Kevin Wang
Sent: Monday, December 7, 2020 12:12 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wang, Kevin(Yang) 
Subject: [PATCH] drm/amdgpu/pm: add smc v2_1 printer in 
amdgpu_ucode_print_smc_hdr()

[CAUTION: External Email]

the smc v2_0 printer is not compatible with the smc v2_1 .
1. add smc v2_1 printer.
2. cleanup code

Signed-off-by: Kevin Wang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 29 +++
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 2b7c90b7a712..1beb08af347f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -68,23 +68,32 @@ void amdgpu_ucode_print_smc_hdr(const struct 
common_firmware_header *hdr)  {
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+   const struct smc_firmware_header_v1_0 *v1_0_hdr;
+   const struct smc_firmware_header_v2_0 *v2_0_hdr;
+   const struct smc_firmware_header_v2_1 *v2_1_hdr;

DRM_DEBUG("SMC\n");
amdgpu_ucode_print_common_hdr(hdr);

if (version_major == 1) {
-   const struct smc_firmware_header_v1_0 *smc_hdr =
-   container_of(hdr, struct smc_firmware_header_v1_0, 
header);
-
-   DRM_DEBUG("ucode_start_addr: %u\n", 
le32_to_cpu(smc_hdr->ucode_start_addr));
+   v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, 
header);
+   DRM_DEBUG("ucode_start_addr: %u\n", 
+ le32_to_cpu(v1_0_hdr->ucode_start_addr));
} else if (version_major == 2) {
-   const struct smc_firmware_header_v1_0 *v1_hdr =
-   container_of(hdr, struct smc_firmware_header_v1_0, 
header);
-   const struct smc_firmware_header_v2_0 *v2_hdr =
-   container_of(v1_hdr, struct smc_firmware_header_v2_0, 
v1_0);
+   switch (version_minor) {
+   case 0:
+   v2_0_hdr = container_of(hdr, struct 
smc_firmware_header_v2_0, v1_0.header);
+   DRM_DEBUG("ppt_offset_bytes: %u\n", 
le32_to_cpu(v2_0_hdr->ppt_offset_bytes));
+   DRM_DEBUG("ppt_size_bytes: %u\n", 
le32_to_cpu(v2_0_hdr->ppt_size_bytes));
+   break;
+   case 1:
+   v2_1_hdr = container_of(hdr, struct 
smc_firmware_header_v2_1, v1_0.header);
+   DRM_DEBUG("pptable_count: %u\n", 
le32_to_cpu(v2_1_hdr->pptable_count));
+   DRM_DEBUG("pptable_entry_offset: %u\n", 
le32_to_cpu(v2_1_hdr->pptable_entry_offset));
+   break;
+   default:
+   break;
+   }

-   DRM_DEBUG("ppt_offset_bytes: %u\n", 
le32_to_cpu(v2_hdr->ppt_offset_bytes));
-   DRM_DEBUG("ppt_size_bytes: %u\n", 
le32_to_cpu(v2_hdr->ppt_size_bytes));
} else {
DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, 
version_minor);
}
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Clijo.lazar%40amd.com%7C37d29083a0b9477713da08d89a7b4c3a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637429201636429266%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=oOvoyTt5DvoNaXr%2FjqBXqZfOnDa1Im672JEXRKP8xsk%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/scheduler: Job timeout handler returns status (v2)

2020-12-07 Thread Christian König

Am 04.12.20 um 16:10 schrieb Andrey Grodzovsky:


On 12/4/20 3:13 AM, Christian König wrote:
Thinking more about that I came to the conclusion that the whole 
approach here isn't correct.


See even when the job has been completed or canceled we still want to 
restart the timer.


The reason for this is that the timer is then not restarted for the 
current job, but for the next job in the queue.


The only valid reason to not restart the timer is that the whole 
device was hot plugged and we return -ENODEV here. E.g. what Andrey 
has been working on.



We discussed this with Luben off line a few days ago but came to a 
conclusion that for the next job the timer restart in 
drm_sched_job_begin should do the work, no ?


Nope, drm_sched_job_begin() pushes the job to the hardware and starts 
the timeout in case the hardware was idle before.


The function should probably be renamed to drm_sched_job_pushed() 
because it doesn't begin the execution in any way.


Christian.



Andrey




Regards,
Christian.

Am 04.12.20 um 04:17 schrieb Luben Tuikov:

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the task (job) was successfully aborted or whether
more time should be given to the task to complete.

Default behaviour as of this patch, is preserved,
except in obvious-by-comment case in the Panfrost
driver, as documented below.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_TASK_STATUS_ALIVE to restart the task's
timeout timer--this is the old behaviour, and
is preserved by this patch.

In the case of the Panfrost driver, its timedout
callback correctly first checks if the job had
completed in due time and if so, it now returns
DRM_TASK_STATUS_COMPLETE to notify the DRM layer
that the task can be moved to the done list, to be
freed later. In the other two subsequent checks,
the value of DRM_TASK_STATUS_ALIVE is returned, as
per the default behaviour.

A more involved driver's solutions can be had
in subequent patches.

Signed-off-by: Luben Tuikov 
Reported-by: kernel test robot 

Cc: Alexander Deucher 
Cc: Andrey Grodzovsky 
Cc: Christian König 
Cc: Daniel Vetter 
Cc: Lucas Stach 
Cc: Russell King 
Cc: Christian Gmeiner 
Cc: Qiang Yu 
Cc: Rob Herring 
Cc: Tomeu Vizoso 
Cc: Steven Price 
Cc: Alyssa Rosenzweig 
Cc: Eric Anholt 

v2: Use enum as the status of a driver's job
 timeout callback method.
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 +++--
  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 10 +++-
  drivers/gpu/drm/lima/lima_sched.c   |  4 +++-
  drivers/gpu/drm/panfrost/panfrost_job.c |  9 ---
  drivers/gpu/drm/scheduler/sched_main.c  |  4 +---
  drivers/gpu/drm/v3d/v3d_sched.c | 32 
+

  include/drm/gpu_scheduler.h | 20 +---
  7 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ff48101bab55..a111326cbdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static enum drm_task_status amdgpu_job_timedout(struct 
drm_sched_job *s_job)

  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  amdgpu_ring_soft_recovery(ring, job->vmid, 
s_job->s_fence->parent)) {

  DRM_ERROR("ring %s timeout, but soft recovered\n",
    s_job->sched->name);
-    return;
+    return DRM_TASK_STATUS_ALIVE;
  }
    amdgpu_vm_get_task_info(ring->adev, job->pasid, );
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct 
drm_sched_job *s_job)

    if (amdgpu_device_should_recover_gpu(ring->adev)) {
  amdgpu_device_gpu_recover(ring->adev, job);
+    return DRM_TASK_STATUS_ALIVE;
  } else {
  drm_sched_suspend_timeout(>sched);
  if (amdgpu_sriov_vf(adev))
  adev->virt.tdr_debug = true;
+    return DRM_TASK_STATUS_ALIVE;
  }
  }
  diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c

index cd46c882269c..c49516942328 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -82,7 +82,8 @@ static struct dma_fence 
*etnaviv_sched_run_job(struct drm_sched_job *sched_job)

  return fence;
  }
  -static void etnaviv_sched_timedout_job(struct drm_sched_job 
*sched_job)
+static enum drm_task_status etnaviv_sched_timedout_job(struct 
drm_sched_job

+   *sched_job)
  {
  struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
  struct etnaviv_gpu *gpu = 

[PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

2020-12-07 Thread jianzh
From: Jiange Zhao 

In Virtualization case, when one VF is sending too many
FLR requests, hypervisor would stop responding to this
VF's request for a long period of time. This is called
event guard. During this period of cooling time, guest
driver should wait instead of doing other things. After
this period of time, guest driver would resume reset
process and return to normal.

Currently, guest driver would wait 12 seconds and return fail
if it doesn't get response from host.

Solution: extend this waiting time in guest driver and poll
response periodically. Poll happens every 6 seconds and it will
last for 60 seconds.

v2: change the max repetition times from number to macro.

Signed-off-by: Jiange Zhao 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h |  1 +
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5ce9a9f4cf5..7767ccca526b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,
 
 static int xgpu_ai_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_ai_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }
 
 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 83b453f5d717..50572635d0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -25,8 +25,9 @@
 #define __MXGPU_AI_H__
 
 #define AI_MAILBOX_POLL_ACK_TIMEDOUT   500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define AI_MAILBOX_POLL_MSG_REP_MAX11
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 666ed99cc14b..dd5c1e6ce009 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,
 
 static int xgpu_nv_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < NV_MAILBOX_POLL_MSG_REP_MAX) {
+   ret = xgpu_nv_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }
 
 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 52605e14a1a5..9f5808616174 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -27,6 +27,7 @@
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT   500
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define NV_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define NV_MAILBOX_POLL_MSG_REP_MAX11
 
 enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd/pm: correct power limit setting for SMU V11

2020-12-07 Thread Evan Quan
Correct the power limit setting for SMU V11 asics.

Change-Id: Idedc590c35934397bd77b7ac825b063cd319dbbf
Signed-off-by: Evan Quan 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 624065d3c079..3288760a5431 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -929,9 +929,13 @@ int smu_v11_0_get_current_power_limit(struct smu_context 
*smu,
if (power_src < 0)
return -EINVAL;
 
+   /*
+* BIT 24-31: ControllerId (hardcoded as PPT0)
+* BIT 16-23: PowerSource
+*/
ret = smu_cmn_send_smc_msg_with_param(smu,
  SMU_MSG_GetPptLimit,
- power_src << 16,
+ (0 << 24) | (power_src << 16),
  power_limit);
if (ret)
dev_err(smu->adev->dev, "[%s] get PPT limit failed!", __func__);
@@ -941,6 +945,7 @@ int smu_v11_0_get_current_power_limit(struct smu_context 
*smu,
 
 int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
 {
+   int power_src;
int ret = 0;
 
if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
@@ -948,6 +953,22 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, 
uint32_t n)
return -EOPNOTSUPP;
}
 
+   power_src = smu_cmn_to_asic_specific_index(smu,
+   CMN2ASIC_MAPPING_PWR,
+   smu->adev->pm.ac_power ?
+   SMU_POWER_SOURCE_AC :
+   SMU_POWER_SOURCE_DC);
+   if (power_src < 0)
+   return -EINVAL;
+
+   /*
+* BIT 24-31: ControllerId (hardcoded as PPT0)
+* BIT 16-23: PowerSource
+* BIT 0-15: PowerLimit
+*/
+   n &= 0x;
+   n |= 0 << 24;
+   n |= (power_src) << 16;
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n, 
NULL);
if (ret) {
dev_err(smu->adev->dev, "[%s] Set power limit Failed!\n", 
__func__);
-- 
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amd/pm: support power source switch on Sienna Cichlid

2020-12-07 Thread Evan Quan
Enable power source switch on Sienna Cichlid.

Change-Id: Ic48821c61f3f1f65f7f5d78bac28e11ce400b4b2
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 84251f4826e6..822ddfc42360 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -354,6 +354,9 @@ static int sienna_cichlid_check_powerplay_table(struct 
smu_context *smu)
struct smu_11_0_7_overdrive_table *overdrive_table;
int i, min_power_mode, max_power_mode;
 
+   if (powerplay_table->platform_caps & 
SMU_11_0_7_PP_PLATFORM_CAP_HARDWAREDC)
+   smu->dc_controlled_by_gpio = true;
+
if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO ||
powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO)
smu_baco->platform_support = true;
@@ -3296,6 +3299,7 @@ static const struct pptable_funcs 
sienna_cichlid_ppt_funcs = {
.set_default_od_settings = sienna_cichlid_set_default_od_settings,
.od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table,
.run_btc = sienna_cichlid_run_btc,
+   .set_power_source = smu_v11_0_set_power_source,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.get_gpu_metrics = sienna_cichlid_get_gpu_metrics,
-- 
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

2020-12-07 Thread Zhang, Hawking
[AMD Public Use]

+   while (i < 11)

Please use macro to replace the magic number. No one want to play with the code 
which need some sort of guess to figure out hack setting or experimental ones.

Regards,
Hawking
From: amd-gfx  On Behalf Of Zhao, Jiange
Sent: Monday, December 7, 2020 16:15
To: Zhao, Jiange ; amd-gfx@lists.freedesktop.org
Cc: Zhang, Andy ; Chen, Horace ; Min, 
Frank 
Subject: Re: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period


[AMD Official Use Only - Internal Distribution Only]


[AMD Official Use Only - Internal Distribution Only]

ping!

From: Zhao, Jiange mailto:jia...@amd.com>>
Sent: Wednesday, November 25, 2020 10:10 PM
To: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Min, Frank mailto:frank@amd.com>>; Chen, Horace 
mailto:horace.c...@amd.com>>; Zhang, Andy 
mailto:andy.zh...@amd.com>>; Zhao, Jiange 
mailto:jiange.z...@amd.com>>
Subject: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

From: Jiange Zhao mailto:jiange.z...@amd.com>>

In Virtualization case, when one VF is sending too many
FLR requests, hypervisor would stop responding to this
VF's request for a long period of time. This is called
event guard. During this period of cooling time, guest
driver should wait instead of doing other things. After
this period of time, guest driver would resume reset
process and return to normal.

Currently, guest driver would wait 12 seconds and return fail
if it doesn't get response from host.

Solution: extend this waiting time in guest driver and poll
response periodically.

Signed-off-by: Jiange Zhao mailto:jiange.z...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ++-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5ce9a9f4cf5..d8d8c623bb74 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_ai_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < 11) {
+   ret = xgpu_ai_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 83b453f5d717..20ee2142f9ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -25,7 +25,7 @@
 #define __MXGPU_AI_H__

 #define AI_MAILBOX_POLL_ACK_TIMEDOUT500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT5000

 enum idh_request {
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 666ed99cc14b..0147dfe21a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_nv_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < 11) {
+   ret = xgpu_nv_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
--
2.25.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: change trace event parameter name from 'driect' to 'immediate'

2020-12-07 Thread Wang, Kevin(Yang)
[AMD Official Use Only - Internal Distribution Only]



From: Koenig, Christian 
Sent: Monday, December 7, 2020 4:23 PM
To: Wang, Kevin(Yang) ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdgpu: change trace event parameter name from 
'driect' to 'immediate'

Am 07.12.20 um 07:56 schrieb Kevin Wang:
> s/direct/immediate/g
>
> amdgpu vm has renamed parameter name from 'direct' to 'immedate'.
> however, the trace event is not updated yet.
>
> Signed-off-by: Kevin Wang 
There is a small typo in the subject line "driect", apart from that the
patch is Reviewed-by: Christian König .

Thanks for removing one item from my TODO list :)

Regards,
Christian.

[kevin]:
ok, thank you for reminding me.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 24 +++
>   1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index ee9480d14cbc..324d5e3f3579 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -366,15 +366,15 @@ TRACE_EVENT(amdgpu_vm_update_ptes,
>
>   TRACE_EVENT(amdgpu_vm_set_ptes,
>TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
> -  uint32_t incr, uint64_t flags, bool direct),
> - TP_ARGS(pe, addr, count, incr, flags, direct),
> +  uint32_t incr, uint64_t flags, bool immediate),
> + TP_ARGS(pe, addr, count, incr, flags, immediate),
>TP_STRUCT__entry(
> __field(u64, pe)
> __field(u64, addr)
> __field(u32, count)
> __field(u32, incr)
> __field(u64, flags)
> -  __field(bool, direct)
> +  __field(bool, immediate)
> ),
>
>TP_fast_assign(
> @@ -383,32 +383,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
>   __entry->count = count;
>   __entry->incr = incr;
>   __entry->flags = flags;
> -__entry->direct = direct;
> +__entry->immediate = immediate;
>   ),
>TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
> -   "direct=%d", __entry->pe, __entry->addr, __entry->incr,
> -   __entry->flags, __entry->count, __entry->direct)
> +   "immediate=%d", __entry->pe, __entry->addr, __entry->incr,
> +   __entry->flags, __entry->count, __entry->immediate)
>   );
>
>   TRACE_EVENT(amdgpu_vm_copy_ptes,
> - TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
> - TP_ARGS(pe, src, count, direct),
> + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate),
> + TP_ARGS(pe, src, count, immediate),
>TP_STRUCT__entry(
> __field(u64, pe)
> __field(u64, src)
> __field(u32, count)
> -  __field(bool, direct)
> +  __field(bool, immediate)
> ),
>
>TP_fast_assign(
>   __entry->pe = pe;
>   __entry->src = src;
>   __entry->count = count;
> -__entry->direct = direct;
> +__entry->immediate = immediate;
>   ),
> - TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
> + TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d",
>  __entry->pe, __entry->src, __entry->count,
> -   __entry->direct)
> +   __entry->immediate)
>   );
>
>   TRACE_EVENT(amdgpu_vm_flush,

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: change trace event parameter name from 'driect' to 'immediate'

2020-12-07 Thread Christian König

Am 07.12.20 um 07:56 schrieb Kevin Wang:

s/direct/immediate/g

amdgpu vm has renamed parameter name from 'direct' to 'immedate'.
however, the trace event is not updated yet.

Signed-off-by: Kevin Wang 
There is a small typo in the subject line "driect", apart from that the 
patch is Reviewed-by: Christian König .


Thanks for removing one item from my TODO list :)

Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 24 +++
  1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index ee9480d14cbc..324d5e3f3579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -366,15 +366,15 @@ TRACE_EVENT(amdgpu_vm_update_ptes,
  
  TRACE_EVENT(amdgpu_vm_set_ptes,

TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
-uint32_t incr, uint64_t flags, bool direct),
-   TP_ARGS(pe, addr, count, incr, flags, direct),
+uint32_t incr, uint64_t flags, bool immediate),
+   TP_ARGS(pe, addr, count, incr, flags, immediate),
TP_STRUCT__entry(
 __field(u64, pe)
 __field(u64, addr)
 __field(u32, count)
 __field(u32, incr)
 __field(u64, flags)
-__field(bool, direct)
+__field(bool, immediate)
 ),
  
  	TP_fast_assign(

@@ -383,32 +383,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
   __entry->count = count;
   __entry->incr = incr;
   __entry->flags = flags;
-  __entry->direct = direct;
+  __entry->immediate = immediate;
   ),
TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
- "direct=%d", __entry->pe, __entry->addr, __entry->incr,
- __entry->flags, __entry->count, __entry->direct)
+ "immediate=%d", __entry->pe, __entry->addr, __entry->incr,
+ __entry->flags, __entry->count, __entry->immediate)
  );
  
  TRACE_EVENT(amdgpu_vm_copy_ptes,

-   TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
-   TP_ARGS(pe, src, count, direct),
+   TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate),
+   TP_ARGS(pe, src, count, immediate),
TP_STRUCT__entry(
 __field(u64, pe)
 __field(u64, src)
 __field(u32, count)
-__field(bool, direct)
+__field(bool, immediate)
 ),
  
  	TP_fast_assign(

   __entry->pe = pe;
   __entry->src = src;
   __entry->count = count;
-  __entry->direct = direct;
+  __entry->immediate = immediate;
   ),
-   TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
+   TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d",
  __entry->pe, __entry->src, __entry->count,
- __entry->direct)
+ __entry->immediate)
  );
  
  TRACE_EVENT(amdgpu_vm_flush,


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

2020-12-07 Thread Zhao, Jiange
[AMD Official Use Only - Internal Distribution Only]

ping!

From: Zhao, Jiange 
Sent: Wednesday, November 25, 2020 10:10 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Min, Frank ; Chen, Horace ; Zhang, 
Andy ; Zhao, Jiange 
Subject: [PATCH] drm/amdgpu/SRIOV: Extend VF reset request wait period

From: Jiange Zhao 

In Virtualization case, when one VF is sending too many
FLR requests, hypervisor would stop responding to this
VF's request for a long period of time. This is called
event guard. During this period of cooling time, guest
driver should wait instead of doing other things. After
this period of time, guest driver would resume reset
process and return to normal.

Currently, guest driver would wait 12 seconds and return fail
if it doesn't get response from host.

Solution: extend this waiting time in guest driver and poll
response periodically.

Signed-off-by: Jiange Zhao 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ++-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5ce9a9f4cf5..d8d8c623bb74 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_ai_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < 11) {
+   ret = xgpu_ai_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 83b453f5d717..20ee2142f9ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -25,7 +25,7 @@
 #define __MXGPU_AI_H__

 #define AI_MAILBOX_POLL_ACK_TIMEDOUT500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT5000

 enum idh_request {
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 666ed99cc14b..0147dfe21a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct 
amdgpu_device *adev,

 static int xgpu_nv_request_reset(struct amdgpu_device *adev)
 {
-   return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+   int ret, i = 0;
+
+   while (i < 11) {
+   ret = xgpu_nv_send_access_requests(adev, 
IDH_REQ_GPU_RESET_ACCESS);
+   if (!ret)
+   break;
+   i++;
+   }
+
+   return ret;
 }

 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx