[PATCH 4/8] drm/amdgpu: invalidate only the currently needed VMHUB v2

2017-04-11 Thread Christian König
From: Christian König 

Drop invalidating both hubs from each engine.

v2: don't use hardcoded values

Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
Reviewed-by: Andres Rodriguez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  36 +--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  60 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 111 +++--
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  57 -
 5 files changed, 119 insertions(+), 148 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db47c51..d6a6873 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,8 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
  struct amdgpu_job *job)
 {
struct amdgpu_device *adev = ring->adev;
-   /* Temporary use only the first VM manager */
-   unsigned vmhub = 0; /*ring->funcs->vmhub;*/
+   unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
struct fence *updates = sync->last_vm_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c8bf2c6..55a17ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2956,35 +2956,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
 {
+   struct amdgpu_vmhub *hub = >adev->vmhub[ring->funcs->vmhub];
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
unsigned eng = ring->idx;
-   unsigned i;
 
pd_addr = pd_addr | 0x1; /* valid bit */
/* now only use physical base address of PDE and valid */
BUG_ON(pd_addr & 0x003EULL);
 
-   for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-   struct amdgpu_vmhub *hub = >adev->vmhub[i];
-
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->ctx0_ptb_addr_lo32
-  + (2 * vm_id),
-  lower_32_bits(pd_addr));
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+  lower_32_bits(pd_addr));
 
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->ctx0_ptb_addr_hi32
-  + (2 * vm_id),
-  upper_32_bits(pd_addr));
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+  upper_32_bits(pd_addr));
 
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->vm_inv_eng0_req + eng, req);
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->vm_inv_eng0_req + eng, req);
 
-   /* wait for the invalidate to complete */
-   gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
- eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
-   }
+   /* wait for the invalidate to complete */
+   gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+ eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
 
/* compute doesn't have PFP */
if (usepfp) {
@@ -3463,7 +3457,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
-   46 + /* VM_FLUSH */
+   24 + /* VM_FLUSH */
8 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -3510,7 +3504,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_hdp_flush */
5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-   64 + /* gfx_v9_0_ring_emit_vm_flush */
+   24 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute 

[PATCH 4/8] drm/amdgpu: invalidate only the currently needed VMHUB v2

2017-04-07 Thread Christian König
From: Christian König 

Drop invalidating both hubs from each engine.

v2: don't use hardcoded values

Signed-off-by: Christian König 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  36 +--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  60 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 111 +++--
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  57 -
 5 files changed, 119 insertions(+), 148 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db47c51..d6a6873 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,8 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
  struct amdgpu_job *job)
 {
struct amdgpu_device *adev = ring->adev;
-   /* Temporary use only the first VM manager */
-   unsigned vmhub = 0; /*ring->funcs->vmhub;*/
+   unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vm_id_manager *id_mgr = >vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
struct fence *updates = sync->last_vm_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 71b092b..8b35617 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2956,35 +2956,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
 {
+   struct amdgpu_vmhub *hub = >adev->vmhub[ring->funcs->vmhub];
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
unsigned eng = ring->idx;
-   unsigned i;
 
pd_addr = pd_addr | 0x1; /* valid bit */
/* now only use physical base address of PDE and valid */
BUG_ON(pd_addr & 0x003EULL);
 
-   for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-   struct amdgpu_vmhub *hub = >adev->vmhub[i];
-
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->ctx0_ptb_addr_lo32
-  + (2 * vm_id),
-  lower_32_bits(pd_addr));
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+  lower_32_bits(pd_addr));
 
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->ctx0_ptb_addr_hi32
-  + (2 * vm_id),
-  upper_32_bits(pd_addr));
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+  upper_32_bits(pd_addr));
 
-   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-  hub->vm_inv_eng0_req + eng, req);
+   gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+  hub->vm_inv_eng0_req + eng, req);
 
-   /* wait for the invalidate to complete */
-   gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
- eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
-   }
+   /* wait for the invalidate to complete */
+   gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+ eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
 
/* compute doesn't have PFP */
if (usepfp) {
@@ -3463,7 +3457,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 +  /* COND_EXEC */
7 +  /* PIPELINE_SYNC */
-   46 + /* VM_FLUSH */
+   24 + /* VM_FLUSH */
8 +  /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -3510,7 +3504,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_hdp_flush */
5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-   64 + /* gfx_v9_0_ring_emit_vm_flush */
+   24 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib =