[PATCH] drm/amdgpu: Add msix restore for pass-through mode

2021-07-22 Thread Chengzhe Liu
In pass-through mode, after mode 1 reset, msix enablement status would
lost and never receives interrupt again. So, we should restore msix
status after mode 1 reset.

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 83af307e97cd..e1aa4a5e6a98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -584,7 +584,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct 
amdgpu_device *adev)
 {
int i, j, k;
 
-   if (amdgpu_sriov_vf(adev))
+   if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
amdgpu_restore_msix(adev);
 
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Clear doorbell interrupt status for Sienna Cichlid

2021-07-21 Thread Chengzhe Liu
On Sienna Cichlid, in pass-through mode, if we unload the driver in BACO
mode(RTPM), then the kernel would receive thousands of interrupts.
That's because there is doorbell monitor interrupt on BIF, so KVM keeps
injecting interrupts to the guest VM. So we should clear the doorbell
interrupt status after BACO exit.

v2: Modify coding style and commit message

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 21 +
 3 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 37fa199be8b3..92f73d2bbfc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5265,6 +5265,10 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
+   if (amdgpu_passthrough(adev) &&
+   adev->nbio.funcs->clear_doorbell_interrupt)
+   adev->nbio.funcs->clear_doorbell_interrupt(adev);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 45295dce5c3e..843052205bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -95,6 +95,7 @@ struct amdgpu_nbio_funcs {
void (*program_aspm)(struct amdgpu_device *adev);
void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
+   void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 7b79eeaa88aa..b184b656b9b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -508,6 +508,26 @@ static void 
nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev
WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
 }
 
+static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
+{
+   uint32_t reg, reg_data;
+
+   if (adev->asic_type != CHIP_SIENNA_CICHLID)
+   return;
+
+   reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
+
+   /* Clear Interrupt Status
+*/
+   if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) {
+   reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+   if (reg & 
BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) {
+   reg_data = 1 << 
BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT;
+   WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
reg_data);
+   }
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -531,4 +551,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.program_aspm =  nbio_v2_3_program_aspm,
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = 
nbio_v2_3_apply_l1_link_width_reconfig_wa,
+   .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
 };
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Clear doorbell interrupt status after BACO exit for SIENNA_CICHLID

2021-07-20 Thread Chengzhe Liu
In passthrough mode, if we unloaded driver in BACO mode(RTPM). Kernel would
receive thousands of unhandled interrupts. That's because there was
doorbell moniter interrupt on BIF, so KVM would keep injecting
interrupt to guest VM. So we should clear door bell interrupt status
after BACO exit.

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 20 
 3 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 37fa199be8b3..109a76ca4535 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5265,6 +5265,9 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
+   if (amdgpu_passthrough(adev) && 
adev->nbio.funcs->clear_doorbell_interrupt)
+   adev->nbio.funcs->clear_doorbell_interrupt(adev);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 45295dce5c3e..843052205bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -95,6 +95,7 @@ struct amdgpu_nbio_funcs {
void (*program_aspm)(struct amdgpu_device *adev);
void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
+   void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 7b79eeaa88aa..044dc63d2e86 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -508,6 +508,25 @@ static void 
nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev
WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
 }
 
+static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
+{
+   uint32_t reg, reg_data;
+
+   if (adev->asic_type != CHIP_SIENNA_CICHLID)
+   return;
+
+   reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
+
+   /*Clear Interrupt Status*/
+   if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) {
+   reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+   if (reg & 
BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) {
+   reg_data = 1 << 
BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT;
+   WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, 
reg_data);
+   }
+   }
+}
+
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -531,4 +550,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.program_aspm =  nbio_v2_3_program_aspm,
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = 
nbio_v2_3_apply_l1_link_width_reconfig_wa,
+   .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
 };
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Power down VCN and JPEG before disabling SMU features

2021-06-18 Thread Chengzhe Liu
When unloading driver, if VCN is powered on, sending message
DisableAllSmuFeatures to SMU will cause SMU hang. We need to
power down VCN and JPEG before clean up SMU.

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index cb375f1beebd..ebe672142808 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1453,10 +1453,14 @@ static int smu_hw_fini(void *handle)
 
if (smu->is_apu) {
smu_powergate_sdma(>smu, true);
-   smu_dpm_set_vcn_enable(smu, false);
-   smu_dpm_set_jpeg_enable(smu, false);
}
 
+   smu_dpm_set_vcn_enable(smu, false);
+   smu_dpm_set_jpeg_enable(smu, false);
+
+   adev->vcn.cur_state = AMD_PG_STATE_GATE;
+   adev->jpeg.cur_state = AMD_PG_STATE_GATE;
+
if (!smu->pm_enabled)
return 0;
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Increase tlb flush timeout for sriov

2021-05-19 Thread Chengzhe Liu
When there is 12 VF, we need to increase the timeout

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f02dc904e4cf..a5f005c5d0ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -404,6 +404,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
 
@@ -422,7 +423,10 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
 
amdgpu_ring_commit(ring);
spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ceb3968d8326..e4a18d8f75c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -857,6 +857,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+   uint32_t sriov_usec_timeout = 600;  /* wait for 12 * 500ms for 
SRIOV */
struct amdgpu_ring *ring = >gfx.kiq.ring;
struct amdgpu_kiq *kiq = >gfx.kiq;
 
@@ -896,7 +897,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct 
amdgpu_device *adev,
 
amdgpu_ring_commit(ring);
spin_unlock(>gfx.kiq.ring_lock);
-   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (amdgpu_sriov_vf(adev))
+   r = amdgpu_fence_wait_polling(ring, seq, 
sriov_usec_timeout);
+   else
+   r = amdgpu_fence_wait_polling(ring, seq, 
adev->usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", 
r);
up_read(>reset_sem);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov

2021-05-19 Thread Chengzhe Liu
In sriov, cwsr is not stable

Signed-off-by: Chengzhe Liu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 80015e866498..89bd0059329b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info = 
{
.event_interrupt_class = _interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
-   .supports_cwsr = true,
+   .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
@@ -505,6 +505,24 @@ static const struct kfd_device_info 
sienna_cichlid_device_info = {
.num_sdma_queues_per_engine = 8,
 };
 
+static const struct kfd_device_info sienna_cichlid_vf_device_info = {
+   .asic_family = CHIP_SIENNA_CICHLID,
+   .asic_name = "sienna_cichlid",
+   .max_pasid_bits = 16,
+   .max_no_of_hqd  = 24,
+   .doorbell_size  = 8,
+   .ih_ring_entry_size = 8 * sizeof(uint32_t),
+   .event_interrupt_class = _interrupt_class_v10,
+   .num_of_watch_points = 4,
+   .mqd_size_aligned = MQD_SIZE_ALIGNED,
+   .needs_iommu_device = false,
+   .supports_cwsr = false,
+   .needs_pci_atomics = true,
+   .num_sdma_engines = 4,
+   .num_xgmi_sdma_engines = 0,
+   .num_sdma_queues_per_engine = 8,
+};
+
 static const struct kfd_device_info navy_flounder_device_info = {
.asic_family = CHIP_NAVY_FLOUNDER,
.asic_name = "navy_flounder",
@@ -601,7 +619,7 @@ static const struct kfd_device_info 
*kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {_device_info, NULL},
[CHIP_NAVI12] = {_device_info, _device_info},
[CHIP_NAVI14] = {_device_info, NULL},
-   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_device_info},
+   [CHIP_SIENNA_CICHLID] = {_cichlid_device_info, 
_cichlid_vf_device_info},
[CHIP_NAVY_FLOUNDER] = {_flounder_device_info, 
_flounder_device_info},
[CHIP_VANGOGH] = {_device_info, NULL},
[CHIP_DIMGREY_CAVEFISH] = {_cavefish_device_info, 
_cavefish_device_info},
@@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
 static void kfd_cwsr_init(struct kfd_dev *kfd)
 {
-   if (cwsr_enable && kfd->device_info->supports_cwsr) {
+   if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 
2) {
if (kfd->device_info->asic_family < CHIP_VEGA10) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx