Re: [PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue
applied this patch instead of original, thanks. On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling wrote: > A minor update to this patch is attached. The rest of the series is > unchanged and rebased cleanly on 4.17-rc2 on my system. > > Regards, > Felix > > > On 2018-04-10 05:33 PM, Felix Kuehling wrote: >> Signed-off-by: Felix Kuehling >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 + >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 >> +-- >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 ++- >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 9 >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 9 >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 9 >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + >> 7 files changed, 63 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> index 36c9269e..5d7 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value) >> } >> } >> >> +void write_kernel_doorbell64(void __iomem *db, u64 value) >> +{ >> + if (db) { >> + WARN(((unsigned long)db & 7) != 0, >> + "Unaligned 64-bit doorbell"); >> + writeq(value, (u64 __iomem *)db); >> + pr_debug("writing %llu to doorbell address 0x%p\n", value, >> db);+} >> +} >> + >> unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, >> struct kfd_process *process, >> unsigned int doorbell_id) >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> index 9f38161..476951d 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct >> kfd_dev *dev, >> kq->rptr_kernel = kq->rptr_mem->cpu_ptr; >> kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; >> >> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), >> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, >> &kq->wptr_mem); >> >> if (retval != 0) >> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >> size_t available_size; >> size_t queue_size_dwords; >> uint32_t wptr, rptr; >> + uint64_t wptr64; >> unsigned int *queue_address; >> >> /* When rptr == wptr, the buffer is empty. >> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >>* the opposite. So we can only use up to queue_size_dwords - 1 dwords. >>*/ >> rptr = *kq->rptr_kernel; >> - wptr = *kq->wptr_kernel; >> + wptr = kq->pending_wptr; >> + wptr64 = kq->pending_wptr64; >> queue_address = (unsigned int *)kq->pq_kernel_addr; >> queue_size_dwords = kq->queue->properties.queue_size / 4; >> >> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue >> *kq, >> while (wptr > 0) { >> queue_address[wptr] = kq->nop_packet; >> wptr = (wptr + 1) % queue_size_dwords; >> + wptr64++; >> } >> } >> >> *buffer_ptr = &queue_address[wptr]; >> kq->pending_wptr = wptr + packet_size_in_dwords; >> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; >> >> return 0; >> >> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) >> pr_debug("\n"); >> #endif >> >> - *kq->wptr_kernel = kq->pending_wptr; >> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, >> - kq->pending_wptr); >> + kq->ops_asic_specific.submit_packet(kq); >> } >> >> static void rollback_packet(struct kernel_queue *kq) >> { >> - kq->pending_wptr = *kq->wptr_kernel; >> + if (kq->dev->device_info->doorbell_size == 8) { >> + kq->pending_wptr64 = *kq->wptr64_kernel; >> + kq->pending_wptr = *kq->wptr_kernel % >> + (kq->queue->properties.queue_size / 4); >> + } else { >> + kq->pending_wptr = *kq->wptr_kernel; >> + } >> } >> >> struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, >> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev >> *dev, >> case CHIP_HAWAII: >> kernel_queue_init_cik(&kq->ops_asic_specific); >> break; >> + >> + case CHIP_VEGA10: >> + case CHIP_RAVEN: >> + kernel_queue_init_v9(&kq->ops_asic_specific); >> + break; >> default: >> WARN(1, "Unexpected ASIC family %u", >>
Re: [PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue
A minor update to this patch is attached. The rest of the series is unchanged and rebased cleanly on 4.17-rc2 on my system. Regards, Felix On 2018-04-10 05:33 PM, Felix Kuehling wrote: > Signed-off-by: Felix Kuehling > --- > drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 + > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 > +-- > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 ++- > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 9 > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 9 > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 9 > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + > 7 files changed, 63 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > index 36c9269e..5d7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value) > } > } > > +void write_kernel_doorbell64(void __iomem *db, u64 value) > +{ > + if (db) { > + WARN(((unsigned long)db & 7) != 0, > + "Unaligned 64-bit doorbell"); > + writeq(value, (u64 __iomem *)db); > + pr_debug("writing %llu to doorbell address 0x%p\n", value, > db);+} > +} > + > unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, > struct kfd_process *process, > unsigned int doorbell_id) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > index 9f38161..476951d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct > kfd_dev *dev, > kq->rptr_kernel = kq->rptr_mem->cpu_ptr; > kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; > > - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), > + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, > &kq->wptr_mem); > > if (retval != 0) > @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, > size_t available_size; > size_t queue_size_dwords; > uint32_t wptr, rptr; > + uint64_t wptr64; > unsigned int *queue_address; > > /* When rptr == wptr, the buffer is empty. > @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >* the opposite. So we can only use up to queue_size_dwords - 1 dwords. >*/ > rptr = *kq->rptr_kernel; > - wptr = *kq->wptr_kernel; > + wptr = kq->pending_wptr; > + wptr64 = kq->pending_wptr64; > queue_address = (unsigned int *)kq->pq_kernel_addr; > queue_size_dwords = kq->queue->properties.queue_size / 4; > > @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue > *kq, > while (wptr > 0) { > queue_address[wptr] = kq->nop_packet; > wptr = (wptr + 1) % queue_size_dwords; > + wptr64++; > } > } > > *buffer_ptr = &queue_address[wptr]; > kq->pending_wptr = wptr + packet_size_in_dwords; > + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; > > return 0; > > @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) > pr_debug("\n"); > #endif > > - *kq->wptr_kernel = kq->pending_wptr; > - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, > - kq->pending_wptr); > + kq->ops_asic_specific.submit_packet(kq); > } > > static void rollback_packet(struct kernel_queue *kq) > { > - kq->pending_wptr = *kq->wptr_kernel; > + if (kq->dev->device_info->doorbell_size == 8) { > + kq->pending_wptr64 = *kq->wptr64_kernel; > + kq->pending_wptr = *kq->wptr_kernel % > + (kq->queue->properties.queue_size / 4); > + } else { > + kq->pending_wptr = *kq->wptr_kernel; > + } > } > > struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, > @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev > *dev, > case CHIP_HAWAII: > kernel_queue_init_cik(&kq->ops_asic_specific); > break; > + > + case CHIP_VEGA10: > + case CHIP_RAVEN: > + kernel_queue_init_v9(&kq->ops_asic_specific); > + break; > default: > WARN(1, "Unexpected ASIC family %u", >dev->device_info->asic_family); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h > b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h > index 5940531..97aff20 100644 > --- a/drivers/gpu/drm/amd/amdk
[PATCH 16/21] drm/amdkfd: Add 64-bit doorbell and wptr support to kernel queue
Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 +-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 ++- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 9 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 9 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 9 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 7 files changed, 63 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 36c9269e..5d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value) } } +void write_kernel_doorbell64(void __iomem *db, u64 value) +{ + if (db) { + WARN(((unsigned long)db & 7) != 0, +"Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); + pr_debug("writing %llu to doorbell address 0x%p\n", value, db); + } +} + unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 9f38161..476951d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, size_t available_size; size_t queue_size_dwords; uint32_t wptr, rptr; + uint64_t wptr64; unsigned int *queue_address; /* When rptr == wptr, the buffer is empty. @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * the opposite. So we can only use up to queue_size_dwords - 1 dwords. */ rptr = *kq->rptr_kernel; - wptr = *kq->wptr_kernel; + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / 4; @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq, while (wptr > 0) { queue_address[wptr] = kq->nop_packet; wptr = (wptr + 1) % queue_size_dwords; + wptr64++; } } *buffer_ptr = &queue_address[wptr]; kq->pending_wptr = wptr + packet_size_in_dwords; + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; return 0; @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) pr_debug("\n"); #endif - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); + kq->ops_asic_specific.submit_packet(kq); } static void rollback_packet(struct kernel_queue *kq) { - kq->pending_wptr = *kq->wptr_kernel; + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; + kq->pending_wptr = *kq->wptr_kernel % + (kq->queue->properties.queue_size / 4); + } else { + kq->pending_wptr = *kq->wptr_kernel; + } } struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 5940531..97aff20 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -72,6 +72,7 @@ struct kernel_queue { struct kfd_dev *dev; struct mqd_manager *mqd; struct queue*queue; + uint64_tpending_wptr64; uint32_tpendi