Re: [PATCH 04/21] drm/amdgpu: Add GFXv9 kfd2kgd interface functions
On Wed, May 16, 2018 at 1:08 AM, Felix Kuehlingwrote: > On 2018-05-15 05:41 AM, Dave Airlie wrote: >>> +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >>> + uint32_t queue_id, uint32_t __user *wptr, >>> + uint32_t wptr_shift, uint32_t wptr_mask, >>> + struct mm_struct *mm) >>> +{ >>> + struct amdgpu_device *adev = get_amdgpu_device(kgd); >>> + struct v9_mqd *m; >>> + uint32_t *mqd_hqd; >>> + uint32_t reg, hqd_base, data; >>> + >>> + m = get_mqd(mqd); >>> + >>> + acquire_queue(kgd, pipe_id, queue_id); >>> + >>> + /* HIQ is set during driver init period with vmid set to 0*/ >>> + if (m->cp_hqd_vmid == 0) { >>> + uint32_t value, mec, pipe; >>> + >>> + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; >>> + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); >>> + >>> + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", >>> + mec, pipe, queue_id); >>> + value = RREG32(SOC15_REG_OFFSET(GC, 0, >>> mmRLC_CP_SCHEDULERS)); >>> + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, >>> + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); >>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); >>> + } >>> + >>> + /* HQD registers extend from CP_MQD_BASE_ADDR to >>> CP_HQD_EOP_WPTR_MEM. */ >>> + mqd_hqd = >cp_mqd_base_addr_lo; >>> + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); >>> + >>> + for (reg = hqd_base; >>> +reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) >>> + WREG32(reg, mqd_hqd[reg - hqd_base]); >>> + >>> + >>> + /* Activate doorbell logic before triggering WPTR poll. */ >>> + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, >>> +CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); >>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); >>> + >>> + if (wptr) { >>> + /* Don't read wptr with get_user because the user >>> +* context may not be accessible (if this function >>> +* runs in a work queue). Instead trigger a one-shot >>> +* polling read from memory in the CP. This assumes >>> +* that wptr is GPU-accessible in the queue's VMID via >>> +* ATC or SVM. WPTR==RPTR before starting the poll so >>> +* the CP starts fetching new commands from the right >>> +* place. >>> +* >>> +* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit >>> +* tricky. Assume that the queue didn't overflow. The >>> +* number of valid bits in the 32-bit RPTR depends on >>> +* the queue size. The remaining bits are taken from >>> +* the saved 64-bit WPTR. If the WPTR wrapped, add the >>> +* queue size. >>> +*/ >>> + uint32_t queue_size = >>> + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, >>> + CP_HQD_PQ_CONTROL, QUEUE_SIZE); >>> + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - >>> 1); >>> + >>> + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < >>> guessed_wptr) >>> + guessed_wptr += queue_size; >>> + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); >>> + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; >>> + >>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), >>> + lower_32_bits(guessed_wptr)); >>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), >>> + upper_32_bits(guessed_wptr)); >>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), >>> + lower_32_bits((uint64_t)wptr)); >>> + WREG32(SOC15_REG_OFFSET(GC, 0, >>> mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), >>> + upper_32_bits((uint64_t)wptr)); >> CC [M] drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.o >> In file included from >> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:30:0: >> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c: >> In function ‘kgd_hqd_load’: >> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:24: >> warning: cast from pointer to integer of different size >> [-Wpointer-to-int-cast] >> lower_32_bits((uint64_t)wptr)); >> ^ >> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu.h:1666:53: >> note: in definition of macro ‘WREG32’ >> #define WREG32(reg, v)
Re: [PATCH 04/21] drm/amdgpu: Add GFXv9 kfd2kgd interface functions
On 2018-05-15 05:41 AM, Dave Airlie wrote: >> +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >> + uint32_t queue_id, uint32_t __user *wptr, >> + uint32_t wptr_shift, uint32_t wptr_mask, >> + struct mm_struct *mm) >> +{ >> + struct amdgpu_device *adev = get_amdgpu_device(kgd); >> + struct v9_mqd *m; >> + uint32_t *mqd_hqd; >> + uint32_t reg, hqd_base, data; >> + >> + m = get_mqd(mqd); >> + >> + acquire_queue(kgd, pipe_id, queue_id); >> + >> + /* HIQ is set during driver init period with vmid set to 0*/ >> + if (m->cp_hqd_vmid == 0) { >> + uint32_t value, mec, pipe; >> + >> + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; >> + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); >> + >> + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", >> + mec, pipe, queue_id); >> + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); >> + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, >> + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); >> + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); >> + } >> + >> + /* HQD registers extend from CP_MQD_BASE_ADDR to >> CP_HQD_EOP_WPTR_MEM. */ >> + mqd_hqd = >cp_mqd_base_addr_lo; >> + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); >> + >> + for (reg = hqd_base; >> +reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) >> + WREG32(reg, mqd_hqd[reg - hqd_base]); >> + >> + >> + /* Activate doorbell logic before triggering WPTR poll. */ >> + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, >> +CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); >> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); >> + >> + if (wptr) { >> + /* Don't read wptr with get_user because the user >> +* context may not be accessible (if this function >> +* runs in a work queue). Instead trigger a one-shot >> +* polling read from memory in the CP. This assumes >> +* that wptr is GPU-accessible in the queue's VMID via >> +* ATC or SVM. WPTR==RPTR before starting the poll so >> +* the CP starts fetching new commands from the right >> +* place. >> +* >> +* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit >> +* tricky. Assume that the queue didn't overflow. The >> +* number of valid bits in the 32-bit RPTR depends on >> +* the queue size. The remaining bits are taken from >> +* the saved 64-bit WPTR. If the WPTR wrapped, add the >> +* queue size. >> +*/ >> + uint32_t queue_size = >> + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, >> + CP_HQD_PQ_CONTROL, QUEUE_SIZE); >> + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); >> + >> + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) >> + guessed_wptr += queue_size; >> + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); >> + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; >> + >> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), >> + lower_32_bits(guessed_wptr)); >> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), >> + upper_32_bits(guessed_wptr)); >> + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), >> + lower_32_bits((uint64_t)wptr)); >> + WREG32(SOC15_REG_OFFSET(GC, 0, >> mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), >> + upper_32_bits((uint64_t)wptr)); > CC [M] drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.o > In file included from > /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:30:0: > /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c: > In function ‘kgd_hqd_load’: > /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:24: > warning: cast from pointer to integer of different size > [-Wpointer-to-int-cast] > lower_32_bits((uint64_t)wptr)); > ^ > /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu.h:1666:53: > note: in definition of macro ‘WREG32’ > #define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) > ^ > /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:10: > note:
Re: [PATCH 04/21] drm/amdgpu: Add GFXv9 kfd2kgd interface functions
> +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm) > +{ > + struct amdgpu_device *adev = get_amdgpu_device(kgd); > + struct v9_mqd *m; > + uint32_t *mqd_hqd; > + uint32_t reg, hqd_base, data; > + > + m = get_mqd(mqd); > + > + acquire_queue(kgd, pipe_id, queue_id); > + > + /* HIQ is set during driver init period with vmid set to 0*/ > + if (m->cp_hqd_vmid == 0) { > + uint32_t value, mec, pipe; > + > + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; > + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); > + > + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", > + mec, pipe, queue_id); > + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); > + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, > + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); > + } > + > + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. > */ > + mqd_hqd = >cp_mqd_base_addr_lo; > + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); > + > + for (reg = hqd_base; > +reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) > + WREG32(reg, mqd_hqd[reg - hqd_base]); > + > + > + /* Activate doorbell logic before triggering WPTR poll. */ > + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, > +CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); > + > + if (wptr) { > + /* Don't read wptr with get_user because the user > +* context may not be accessible (if this function > +* runs in a work queue). Instead trigger a one-shot > +* polling read from memory in the CP. This assumes > +* that wptr is GPU-accessible in the queue's VMID via > +* ATC or SVM. WPTR==RPTR before starting the poll so > +* the CP starts fetching new commands from the right > +* place. > +* > +* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit > +* tricky. Assume that the queue didn't overflow. The > +* number of valid bits in the 32-bit RPTR depends on > +* the queue size. The remaining bits are taken from > +* the saved 64-bit WPTR. If the WPTR wrapped, add the > +* queue size. > +*/ > + uint32_t queue_size = > + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, > + CP_HQD_PQ_CONTROL, QUEUE_SIZE); > + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); > + > + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) > + guessed_wptr += queue_size; > + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); > + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; > + > + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), > + lower_32_bits(guessed_wptr)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), > + upper_32_bits(guessed_wptr)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), > + lower_32_bits((uint64_t)wptr)); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), > + upper_32_bits((uint64_t)wptr)); CC [M] drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.o In file included from /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:30:0: /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c: In function ‘kgd_hqd_load’: /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:24: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] lower_32_bits((uint64_t)wptr)); ^ /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu.h:1666:53: note: in definition of macro ‘WREG32’ #define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) ^ /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:10: note: in expansion of macro ‘lower_32_bits’ lower_32_bits((uint64_t)wptr)); ^