Re: [PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

2018-05-11 Thread Oded Gabbay
applied this patch instead of original, thanks.

On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling  wrote:
> A minor update to this patch is attached. The rest of the series is
> unchanged and rebased cleanly on 4.17-rc2 on my system.
>
> Regards,
>   Felix
>
>
> On 2018-04-10 05:33 PM, Felix Kuehling wrote:
>> Signed-off-by: Felix Kuehling 
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 
>> +--
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  7 ++-
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
>>  7 files changed, 63 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> index 36c9269e..5d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
>> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>>   }
>>  }
>>
>> +void write_kernel_doorbell64(void __iomem *db, u64 value)
>> +{
>> + if (db) {
>> + WARN(((unsigned long)db & 7) != 0,
>> +  "Unaligned 64-bit doorbell");
>> + writeq(value, (u64 __iomem *)db);
>> + pr_debug("writing %llu to doorbell address 0x%p\n", value, 
>> db);+}
>> +}
>> +
>>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>>   struct kfd_process *process,
>>   unsigned int doorbell_id)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 9f38161..476951d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct 
>> kfd_dev *dev,
>>   kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>>   kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>>
>> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
>> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>>   >wptr_mem);
>>
>>   if (retval != 0)
>> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>   size_t available_size;
>>   size_t queue_size_dwords;
>>   uint32_t wptr, rptr;
>> + uint64_t wptr64;
>>   unsigned int *queue_address;
>>
>>   /* When rptr == wptr, the buffer is empty.
>> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>>* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>>*/
>>   rptr = *kq->rptr_kernel;
>> - wptr = *kq->wptr_kernel;
>> + wptr = kq->pending_wptr;
>> + wptr64 = kq->pending_wptr64;
>>   queue_address = (unsigned int *)kq->pq_kernel_addr;
>>   queue_size_dwords = kq->queue->properties.queue_size / 4;
>>
>> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue 
>> *kq,
>>   while (wptr > 0) {
>>   queue_address[wptr] = kq->nop_packet;
>>   wptr = (wptr + 1) % queue_size_dwords;
>> + wptr64++;
>>   }
>>   }
>>
>>   *buffer_ptr = _address[wptr];
>>   kq->pending_wptr = wptr + packet_size_in_dwords;
>> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>>
>>   return 0;
>>
>> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>>   pr_debug("\n");
>>  #endif
>>
>> - *kq->wptr_kernel = kq->pending_wptr;
>> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
>> - kq->pending_wptr);
>> + kq->ops_asic_specific.submit_packet(kq);
>>  }
>>
>>  static void rollback_packet(struct kernel_queue *kq)
>>  {
>> - kq->pending_wptr = *kq->wptr_kernel;
>> + if (kq->dev->device_info->doorbell_size == 8) {
>> + kq->pending_wptr64 = *kq->wptr64_kernel;
>> + kq->pending_wptr = *kq->wptr_kernel %
>> + (kq->queue->properties.queue_size / 4);
>> + } else {
>> + kq->pending_wptr = *kq->wptr_kernel;
>> + }
>>  }
>>
>>  struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
>> *dev,
>>   case CHIP_HAWAII:
>>   kernel_queue_init_cik(>ops_asic_specific);
>>   break;
>> +
>> + case CHIP_VEGA10:
>> + case CHIP_RAVEN:
>> + kernel_queue_init_v9(>ops_asic_specific);
>> + break;
>>   default:
>>   WARN(1, "Unexpected 

Re: [PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

2018-04-24 Thread Felix Kuehling
A minor update to this patch is attached. The rest of the series is
unchanged and rebased cleanly on 4.17-rc2 on my system.

Regards,
  Felix


On 2018-04-10 05:33 PM, Felix Kuehling wrote:
> Signed-off-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 
> +--
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  7 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 
>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
>  7 files changed, 63 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> index 36c9269e..5d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
>   }
>  }
>  
> +void write_kernel_doorbell64(void __iomem *db, u64 value)
> +{
> + if (db) {
> + WARN(((unsigned long)db & 7) != 0,
> +  "Unaligned 64-bit doorbell");
> + writeq(value, (u64 __iomem *)db);
> + pr_debug("writing %llu to doorbell address 0x%p\n", value, 
> db);+}
> +}
> +
>  unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
>   struct kfd_process *process,
>   unsigned int doorbell_id)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 9f38161..476951d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct 
> kfd_dev *dev,
>   kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
>   kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
>  
> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>   >wptr_mem);
>  
>   if (retval != 0)
> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>   size_t available_size;
>   size_t queue_size_dwords;
>   uint32_t wptr, rptr;
> + uint64_t wptr64;
>   unsigned int *queue_address;
>  
>   /* When rptr == wptr, the buffer is empty.
> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
>* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
>*/
>   rptr = *kq->rptr_kernel;
> - wptr = *kq->wptr_kernel;
> + wptr = kq->pending_wptr;
> + wptr64 = kq->pending_wptr64;
>   queue_address = (unsigned int *)kq->pq_kernel_addr;
>   queue_size_dwords = kq->queue->properties.queue_size / 4;
>  
> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue 
> *kq,
>   while (wptr > 0) {
>   queue_address[wptr] = kq->nop_packet;
>   wptr = (wptr + 1) % queue_size_dwords;
> + wptr64++;
>   }
>   }
>  
>   *buffer_ptr = _address[wptr];
>   kq->pending_wptr = wptr + packet_size_in_dwords;
> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
>  
>   return 0;
>  
> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
>   pr_debug("\n");
>  #endif
>  
> - *kq->wptr_kernel = kq->pending_wptr;
> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
> - kq->pending_wptr);
> + kq->ops_asic_specific.submit_packet(kq);
>  }
>  
>  static void rollback_packet(struct kernel_queue *kq)
>  {
> - kq->pending_wptr = *kq->wptr_kernel;
> + if (kq->dev->device_info->doorbell_size == 8) {
> + kq->pending_wptr64 = *kq->wptr64_kernel;
> + kq->pending_wptr = *kq->wptr_kernel %
> + (kq->queue->properties.queue_size / 4);
> + } else {
> + kq->pending_wptr = *kq->wptr_kernel;
> + }
>  }
>  
>  struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
> *dev,
>   case CHIP_HAWAII:
>   kernel_queue_init_cik(>ops_asic_specific);
>   break;
> +
> + case CHIP_VEGA10:
> + case CHIP_RAVEN:
> + kernel_queue_init_v9(>ops_asic_specific);
> + break;
>   default:
>   WARN(1, "Unexpected ASIC family %u",
>dev->device_info->asic_family);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 5940531..97aff20 100644
> --- 

[PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue

2018-04-10 Thread Felix Kuehling
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 +--
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  7 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c |  9 
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  9 
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  9 
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
 7 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 36c9269e..5d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value)
}
 }
 
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+   if (db) {
+   WARN(((unsigned long)db & 7) != 0,
+"Unaligned 64-bit doorbell");
+   writeq(value, (u64 __iomem *)db);
+   pr_debug("writing %llu to doorbell address 0x%p\n", value, db);
+   }
+}
+
 unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 9f38161..476951d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct 
kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
 
-   retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+   retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
>wptr_mem);
 
if (retval != 0)
@@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
size_t available_size;
size_t queue_size_dwords;
uint32_t wptr, rptr;
+   uint64_t wptr64;
unsigned int *queue_address;
 
/* When rptr == wptr, the buffer is empty.
@@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
 */
rptr = *kq->rptr_kernel;
-   wptr = *kq->wptr_kernel;
+   wptr = kq->pending_wptr;
+   wptr64 = kq->pending_wptr64;
queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / 4;
 
@@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
while (wptr > 0) {
queue_address[wptr] = kq->nop_packet;
wptr = (wptr + 1) % queue_size_dwords;
+   wptr64++;
}
}
 
*buffer_ptr = _address[wptr];
kq->pending_wptr = wptr + packet_size_in_dwords;
+   kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
 
return 0;
 
@@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
pr_debug("\n");
 #endif
 
-   *kq->wptr_kernel = kq->pending_wptr;
-   write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
-   kq->pending_wptr);
+   kq->ops_asic_specific.submit_packet(kq);
 }
 
 static void rollback_packet(struct kernel_queue *kq)
 {
-   kq->pending_wptr = *kq->wptr_kernel;
+   if (kq->dev->device_info->doorbell_size == 8) {
+   kq->pending_wptr64 = *kq->wptr64_kernel;
+   kq->pending_wptr = *kq->wptr_kernel %
+   (kq->queue->properties.queue_size / 4);
+   } else {
+   kq->pending_wptr = *kq->wptr_kernel;
+   }
 }
 
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_HAWAII:
kernel_queue_init_cik(>ops_asic_specific);
break;
+
+   case CHIP_VEGA10:
+   case CHIP_RAVEN:
+   kernel_queue_init_v9(>ops_asic_specific);
+   break;
default:
WARN(1, "Unexpected ASIC family %u",
 dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 5940531..97aff20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -72,6 +72,7 @@ struct kernel_queue {
struct kfd_dev  *dev;
struct mqd_manager  *mqd;
struct queue*queue;
+   uint64_tpending_wptr64;
uint32_t