Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Shashank Sharma



On 07/02/2023 15:20, Alex Deucher wrote:

On Tue, Feb 7, 2023 at 9:19 AM Christian König  wrote:

Am 07.02.23 um 15:17 schrieb Alex Deucher:

On Tue, Feb 7, 2023 at 9:11 AM Christian König
 wrote:

Am 07.02.23 um 15:07 schrieb Alex Deucher:

On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  wrote:

On 07/02/2023 08:03, Christian König wrote:

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
 wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
 wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
   include/uapi/drm/amdgpu_drm.h | 53
+++
   1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
   #define DRM_AMDGPU_VM  0x13
   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
   #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

   #define DRM_IOCTL_AMDGPU_GEM_CREATE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
drm_amdgpu_gem_create)
   #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
+ DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
DRM_AMDGPU_VM, union drm_amdgpu_vm)
   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
drm_amdgpu_fence_to_handle)
   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

   /**
* DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
  union drm_amdgpu_ctx_out out;
   };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the
existing
kernel doorbell calculations are keeping doorbells size as
sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be
qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.

Ah, so you only wanted to fix the comment. That was absolutely not
clear from the discussion.

If I understand this correctly, the offset of the doorbell in the BO is
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting
that right ?

Right.  Each doorbell is 64 bits (8 bytes) so this value would
basically be an index into the doorbell bo.  Having it be a 64 bit
index rather than a 32 bit index would avoid the possibility of users
specifying overlapping doorbells.  E.g.,
offset in bytes
0 - doorbell
4 - doorbell
Would be incorrect, while
offset in bytes
0 - doorbell
8 - doorbell
Would be correct.

I.e., u64 doorbell_page[512] vs u32 doorbell_page[1024]

Well I usually prefer just straight byte offsets, but I think the main
question 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Alex Deucher
On Tue, Feb 7, 2023 at 9:19 AM Christian König  wrote:
>
> Am 07.02.23 um 15:17 schrieb Alex Deucher:
> > On Tue, Feb 7, 2023 at 9:11 AM Christian König
> >  wrote:
> >> Am 07.02.23 um 15:07 schrieb Alex Deucher:
> >>> On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  
> >>> wrote:
>  On 07/02/2023 08:03, Christian König wrote:
> > Am 06.02.23 um 22:03 schrieb Alex Deucher:
> >> On Mon, Feb 6, 2023 at 12:01 PM Christian König
> >>  wrote:
> >>> Am 06.02.23 um 17:56 schrieb Alex Deucher:
>  On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
>   wrote:
> > Hey Alex,
> >
> > On 03/02/2023 23:07, Alex Deucher wrote:
> >> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
> >>  wrote:
> >>> From: Alex Deucher 
> >>>
> >>> This patch intorduces new UAPI/IOCTL for usermode graphics
> >>> queue. The userspace app will fill this structure and request
> >>> the graphics driver to add a graphics work queue for it. The
> >>> output of this UAPI is a queue id.
> >>>
> >>> This UAPI maps the queue into GPU, so the graphics app can start
> >>> submitting work to the queue as soon as the call returns.
> >>>
> >>> Cc: Alex Deucher 
> >>> Cc: Christian Koenig 
> >>> Signed-off-by: Alex Deucher 
> >>> Signed-off-by: Shashank Sharma 
> >>> ---
> >>>   include/uapi/drm/amdgpu_drm.h | 53
> >>> +++
> >>>   1 file changed, 53 insertions(+)
> >>>
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h
> >>> b/include/uapi/drm/amdgpu_drm.h
> >>> index 4038abe8505a..6c5235d107b3 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -54,6 +54,7 @@ extern "C" {
> >>>   #define DRM_AMDGPU_VM  0x13
> >>>   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >>>   #define DRM_AMDGPU_SCHED   0x15
> >>> +#define DRM_AMDGPU_USERQ   0x16
> >>>
> >>>   #define DRM_IOCTL_AMDGPU_GEM_CREATE
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> >>> drm_amdgpu_gem_create)
> >>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
> >>> + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>> @@ -71,6 +72,7 @@ extern "C" {
> >>>   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
> >>> drm_amdgpu_fence_to_handle)
> >>>   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >>> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>>
> >>>   /**
> >>>* DOC: memory domains
> >>> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> >>>  union drm_amdgpu_ctx_out out;
> >>>   };
> >>>
> >>> +/* user queue IOCTL */
> >>> +#define AMDGPU_USERQ_OP_CREATE 1
> >>> +#define AMDGPU_USERQ_OP_FREE   2
> >>> +
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> >>> +
> >>> +struct drm_amdgpu_userq_mqd {
> >>> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >>> +   __u32   flags;
> >>> +   /** IP type: AMDGPU_HW_IP_* */
> >>> +   __u32   ip_type;
> >>> +   /** GEM object handle */
> >>> +   __u32   doorbell_handle;
> >>> +   /** Doorbell offset in dwords */
> >>> +   __u32   doorbell_offset;
> >> Since doorbells are 64 bit, maybe this offset should be in qwords.
> > Can you please help to cross check this information ? All the
> > existing
> > kernel doorbell calculations are keeping doorbells size as
> > sizeof(u32)
>  Doorbells on pre-vega hardware are 32 bits so that is where that 
>  comes
>  from, but from vega onward most doorbells are 64 bit.  I think some
>  versions of VCN may still use 32 bit doorbells.  Internally in the
>  kernel driver we just use two slots for newer hardware, but for the
>  UAPI, I think we can just stick with 64 bit slots to avoid confusion.
>  Even if an engine only uses a 32 bit one, I don't know that there is
>  much value to trying to support variable doorbell sizes.
> >>> I think we can stick with using __u32 because this is *not* the size 
> >>> of
> >>> the doorbell 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Christian König

Am 07.02.23 um 15:17 schrieb Alex Deucher:

On Tue, Feb 7, 2023 at 9:11 AM Christian König
 wrote:

Am 07.02.23 um 15:07 schrieb Alex Deucher:

On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  wrote:

On 07/02/2023 08:03, Christian König wrote:

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
 wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
 wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 53
+++
  1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
  #define DRM_AMDGPU_VM  0x13
  #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
  #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

  #define DRM_IOCTL_AMDGPU_GEM_CREATE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
drm_amdgpu_gem_create)
  #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
+ DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
drm_amdgpu_fence_to_handle)
  #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

  /**
   * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
 union drm_amdgpu_ctx_out out;
  };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the
existing
kernel doorbell calculations are keeping doorbells size as
sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be
qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.

Ah, so you only wanted to fix the comment. That was absolutely not
clear from the discussion.

If I understand this correctly, the offset of the doorbell in the BO is
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting
that right ?

Right.  Each doorbell is 64 bits (8 bytes) so this value would
basically be an index into the doorbell bo.  Having it be a 64 bit
index rather than a 32 bit index would avoid the possibility of users
specifying overlapping doorbells.  E.g.,
offset in bytes
0 - doorbell
4 - doorbell
Would be incorrect, while
offset in bytes
0 - doorbell
8 - doorbell
Would be correct.

I.e., u64 doorbell_page[512] vs u32 doorbell_page[1024]

Well I usually prefer just straight byte offsets, but I think the main
question is what does the underlying hw/fw use?

If that's a dword index we should probably stick with that in the UAPI
as 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Alex Deucher
On Tue, Feb 7, 2023 at 9:11 AM Christian König
 wrote:
>
> Am 07.02.23 um 15:07 schrieb Alex Deucher:
> > On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  
> > wrote:
> >>
> >> On 07/02/2023 08:03, Christian König wrote:
> >>> Am 06.02.23 um 22:03 schrieb Alex Deucher:
>  On Mon, Feb 6, 2023 at 12:01 PM Christian König
>   wrote:
> > Am 06.02.23 um 17:56 schrieb Alex Deucher:
> >> On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
> >>  wrote:
> >>> Hey Alex,
> >>>
> >>> On 03/02/2023 23:07, Alex Deucher wrote:
>  On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
>   wrote:
> > From: Alex Deucher 
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >  include/uapi/drm/amdgpu_drm.h | 53
> > +++
> >  1 file changed, 53 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h
> > index 4038abe8505a..6c5235d107b3 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >  #define DRM_AMDGPU_VM  0x13
> >  #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >  #define DRM_AMDGPU_SCHED   0x15
> > +#define DRM_AMDGPU_USERQ   0x16
> >
> >  #define DRM_IOCTL_AMDGPU_GEM_CREATE
> > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> > drm_amdgpu_gem_create)
> >  #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
> > + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >  #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
> > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
> > drm_amdgpu_fence_to_handle)
> >  #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
> > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >  /**
> >   * DOC: memory domains
> > @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> > union drm_amdgpu_ctx_out out;
> >  };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE 1
> > +#define AMDGPU_USERQ_OP_FREE   2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > +   __u32   flags;
> > +   /** IP type: AMDGPU_HW_IP_* */
> > +   __u32   ip_type;
> > +   /** GEM object handle */
> > +   __u32   doorbell_handle;
> > +   /** Doorbell offset in dwords */
> > +   __u32   doorbell_offset;
>  Since doorbells are 64 bit, maybe this offset should be in qwords.
> >>> Can you please help to cross check this information ? All the
> >>> existing
> >>> kernel doorbell calculations are keeping doorbells size as
> >>> sizeof(u32)
> >> Doorbells on pre-vega hardware are 32 bits so that is where that comes
> >> from, but from vega onward most doorbells are 64 bit.  I think some
> >> versions of VCN may still use 32 bit doorbells.  Internally in the
> >> kernel driver we just use two slots for newer hardware, but for the
> >> UAPI, I think we can just stick with 64 bit slots to avoid confusion.
> >> Even if an engine only uses a 32 bit one, I don't know that there is
> >> much value to trying to support variable doorbell sizes.
> > I think we can stick with using __u32 because this is *not* the size of
> > the doorbell entries.
> >
> > Instead this is the offset into the BO where to find the doorbell for
> > this queue (which then in turn is 64bits wide).
> >
> > Since we will probably never have more than 4GiB doorbells we should be
> > pretty save to use 32bits here.
>  Yes, the offset would still be 32 bits, but the units 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Christian König

Am 07.02.23 um 15:07 schrieb Alex Deucher:

On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  wrote:


On 07/02/2023 08:03, Christian König wrote:

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
 wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
 wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 include/uapi/drm/amdgpu_drm.h | 53
+++
 1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
 #define DRM_AMDGPU_VM  0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
 #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

 #define DRM_IOCTL_AMDGPU_GEM_CREATE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
+ DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

 /**
  * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
 };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the
existing
kernel doorbell calculations are keeping doorbells size as
sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be
qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.

Ah, so you only wanted to fix the comment. That was absolutely not
clear from the discussion.

If I understand this correctly, the offset of the doorbell in the BO is
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting
that right ?

Right.  Each doorbell is 64 bits (8 bytes) so this value would
basically be an index into the doorbell bo.  Having it be a 64 bit
index rather than a 32 bit index would avoid the possibility of users
specifying overlapping doorbells.  E.g.,
offset in bytes
0 - doorbell
4 - doorbell
Would be incorrect, while
offset in bytes
0 - doorbell
8 - doorbell
Would be correct.

I.e., u64 doorbell_page[512] vs u32 doorbell_page[1024]


Well I usually prefer just straight byte offsets, but I think the main 
question is what does the underlying hw/fw use?


If that's a dword index we should probably stick with that in the UAPI 
as well. If it's in qword then stick to that, if it's in bytes than use 
that.


Otherwise we will just confuse 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-07 Thread Alex Deucher
On Tue, Feb 7, 2023 at 2:38 AM Shashank Sharma  wrote:
>
>
> On 07/02/2023 08:03, Christian König wrote:
> > Am 06.02.23 um 22:03 schrieb Alex Deucher:
> >> On Mon, Feb 6, 2023 at 12:01 PM Christian König
> >>  wrote:
> >>> Am 06.02.23 um 17:56 schrieb Alex Deucher:
>  On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma
>   wrote:
> > Hey Alex,
> >
> > On 03/02/2023 23:07, Alex Deucher wrote:
> >> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma
> >>  wrote:
> >>> From: Alex Deucher 
> >>>
> >>> This patch intorduces new UAPI/IOCTL for usermode graphics
> >>> queue. The userspace app will fill this structure and request
> >>> the graphics driver to add a graphics work queue for it. The
> >>> output of this UAPI is a queue id.
> >>>
> >>> This UAPI maps the queue into GPU, so the graphics app can start
> >>> submitting work to the queue as soon as the call returns.
> >>>
> >>> Cc: Alex Deucher 
> >>> Cc: Christian Koenig 
> >>> Signed-off-by: Alex Deucher 
> >>> Signed-off-by: Shashank Sharma 
> >>> ---
> >>> include/uapi/drm/amdgpu_drm.h | 53
> >>> +++
> >>> 1 file changed, 53 insertions(+)
> >>>
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h
> >>> b/include/uapi/drm/amdgpu_drm.h
> >>> index 4038abe8505a..6c5235d107b3 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -54,6 +54,7 @@ extern "C" {
> >>> #define DRM_AMDGPU_VM  0x13
> >>> #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >>> #define DRM_AMDGPU_SCHED   0x15
> >>> +#define DRM_AMDGPU_USERQ   0x16
> >>>
> >>> #define DRM_IOCTL_AMDGPU_GEM_CREATE
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> >>> drm_amdgpu_gem_create)
> >>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE
> >>> + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>> @@ -71,6 +72,7 @@ extern "C" {
> >>> #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>> #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union
> >>> drm_amdgpu_fence_to_handle)
> >>> #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >>> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE +
> >>> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>>
> >>> /**
> >>>  * DOC: memory domains
> >>> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> >>>union drm_amdgpu_ctx_out out;
> >>> };
> >>>
> >>> +/* user queue IOCTL */
> >>> +#define AMDGPU_USERQ_OP_CREATE 1
> >>> +#define AMDGPU_USERQ_OP_FREE   2
> >>> +
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> >>> +
> >>> +struct drm_amdgpu_userq_mqd {
> >>> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >>> +   __u32   flags;
> >>> +   /** IP type: AMDGPU_HW_IP_* */
> >>> +   __u32   ip_type;
> >>> +   /** GEM object handle */
> >>> +   __u32   doorbell_handle;
> >>> +   /** Doorbell offset in dwords */
> >>> +   __u32   doorbell_offset;
> >> Since doorbells are 64 bit, maybe this offset should be in qwords.
> > Can you please help to cross check this information ? All the
> > existing
> > kernel doorbell calculations are keeping doorbells size as
> > sizeof(u32)
>  Doorbells on pre-vega hardware are 32 bits so that is where that comes
>  from, but from vega onward most doorbells are 64 bit.  I think some
>  versions of VCN may still use 32 bit doorbells.  Internally in the
>  kernel driver we just use two slots for newer hardware, but for the
>  UAPI, I think we can just stick with 64 bit slots to avoid confusion.
>  Even if an engine only uses a 32 bit one, I don't know that there is
>  much value to trying to support variable doorbell sizes.
> >>> I think we can stick with using __u32 because this is *not* the size of
> >>> the doorbell entries.
> >>>
> >>> Instead this is the offset into the BO where to find the doorbell for
> >>> this queue (which then in turn is 64bits wide).
> >>>
> >>> Since we will probably never have more than 4GiB doorbells we should be
> >>> pretty save to use 32bits here.
> >> Yes, the offset would still be 32 bits, but the units would be
> >> qwords.  E.g.,
> >>
> >> +   /** Doorbell offset in qwords */
> >> +   __u32   doorbell_offset;
> >>
> >> That way you couldn't accidently specify an overlapping doorbell.
> >
> > Ah, so you only wanted to fix the comment. That was absolutely not
> > clear from the discussion.
>
> If I understand this 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:03, Christian König wrote:

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:
On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma 
 wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:
On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma 
 wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
    include/uapi/drm/amdgpu_drm.h | 53 
+++

    1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
    #define DRM_AMDGPU_VM  0x13
    #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
    #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

    #define DRM_IOCTL_AMDGPU_GEM_CREATE 
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union 
drm_amdgpu_gem_create)
    #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE 
+ DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)

@@ -71,6 +72,7 @@ extern "C" {
    #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
    #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE 
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union 
drm_amdgpu_fence_to_handle)
    #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)


    /**
 * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
   union drm_amdgpu_ctx_out out;
    };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.
Can you please help to cross check this information ? All the 
existing
kernel doorbell calculations are keeping doorbells size as 
sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.
Yes, the offset would still be 32 bits, but the units would be 
qwords.  E.g.,


+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.


Ah, so you only wanted to fix the comment. That was absolutely not 
clear from the discussion.


If I understand this correctly, the offset of the doorbell in the BO is 
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting 
that right ?


- Shashank



Christian.



Alex


Christian.


Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+    * unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Christian König

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
include/uapi/drm/amdgpu_drm.h | 53 +++
1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
#define DRM_AMDGPU_VM  0x13
#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
#define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

#define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

/**
 * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
   union drm_amdgpu_ctx_out out;
};

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the existing
kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.


Ah, so you only wanted to fix the comment. That was absolutely not clear 
from the discussion.


Christian.



Alex


Christian.


Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to match the input.

Agree.

- Shashank


Alex


+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:
>
> Am 06.02.23 um 17:56 schrieb Alex Deucher:
> > On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  
> > wrote:
> >> Hey Alex,
> >>
> >> On 03/02/2023 23:07, Alex Deucher wrote:
> >>> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  
> >>> wrote:
>  From: Alex Deucher 
> 
>  This patch intorduces new UAPI/IOCTL for usermode graphics
>  queue. The userspace app will fill this structure and request
>  the graphics driver to add a graphics work queue for it. The
>  output of this UAPI is a queue id.
> 
>  This UAPI maps the queue into GPU, so the graphics app can start
>  submitting work to the queue as soon as the call returns.
> 
>  Cc: Alex Deucher 
>  Cc: Christian Koenig 
>  Signed-off-by: Alex Deucher 
>  Signed-off-by: Shashank Sharma 
>  ---
> include/uapi/drm/amdgpu_drm.h | 53 +++
> 1 file changed, 53 insertions(+)
> 
>  diff --git a/include/uapi/drm/amdgpu_drm.h 
>  b/include/uapi/drm/amdgpu_drm.h
>  index 4038abe8505a..6c5235d107b3 100644
>  --- a/include/uapi/drm/amdgpu_drm.h
>  +++ b/include/uapi/drm/amdgpu_drm.h
>  @@ -54,6 +54,7 @@ extern "C" {
> #define DRM_AMDGPU_VM  0x13
> #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> #define DRM_AMDGPU_SCHED   0x15
>  +#define DRM_AMDGPU_USERQ   0x16
> 
> #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>  @@ -71,6 +72,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_VM, union drm_amdgpu_vm)
> #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>  +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> 
> /**
>  * DOC: memory domains
>  @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
>    union drm_amdgpu_ctx_out out;
> };
> 
>  +/* user queue IOCTL */
>  +#define AMDGPU_USERQ_OP_CREATE 1
>  +#define AMDGPU_USERQ_OP_FREE   2
>  +
>  +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
>  +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
>  +
>  +struct drm_amdgpu_userq_mqd {
>  +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>  +   __u32   flags;
>  +   /** IP type: AMDGPU_HW_IP_* */
>  +   __u32   ip_type;
>  +   /** GEM object handle */
>  +   __u32   doorbell_handle;
>  +   /** Doorbell offset in dwords */
>  +   __u32   doorbell_offset;
> >>> Since doorbells are 64 bit, maybe this offset should be in qwords.
> >> Can you please help to cross check this information ? All the existing
> >> kernel doorbell calculations are keeping doorbells size as sizeof(u32)
> > Doorbells on pre-vega hardware are 32 bits so that is where that comes
> > from, but from vega onward most doorbells are 64 bit.  I think some
> > versions of VCN may still use 32 bit doorbells.  Internally in the
> > kernel driver we just use two slots for newer hardware, but for the
> > UAPI, I think we can just stick with 64 bit slots to avoid confusion.
> > Even if an engine only uses a 32 bit one, I don't know that there is
> > much value to trying to support variable doorbell sizes.
>
> I think we can stick with using __u32 because this is *not* the size of
> the doorbell entries.
>
> Instead this is the offset into the BO where to find the doorbell for
> this queue (which then in turn is 64bits wide).
>
> Since we will probably never have more than 4GiB doorbells we should be
> pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.

Alex

>
> Christian.
>
> >
> > Alex
> >
>  +   /** GPU virtual address of the queue */
>  +   __u64   queue_va;
>  +   /** Size of the queue in bytes */
>  +   __u64   queue_size;
>  +   /** GPU virtual address of the rptr */
>  +   __u64   rptr_va;
>  +   /** GPU virtual address of the wptr */
>  +   __u64   wptr_va;
>  +};
>  +
>  +struct drm_amdgpu_userq_in {
>  +   /** AMDGPU_USERQ_OP_* */
>  +   __u32   op;
>  +   /** Flags */
>  +   __u32   flags;
>  +   /** Queue handle to associate the queue 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Christian König

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
   include/uapi/drm/amdgpu_drm.h | 53 +++
   1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
   #define DRM_AMDGPU_VM  0x13
   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
   #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

   #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
   #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
   #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

   /**
* DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
  union drm_amdgpu_ctx_out out;
   };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the existing
kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.


I think we can stick with using __u32 because this is *not* the size of 
the doorbell entries.


Instead this is the offset into the BO where to find the doorbell for 
this queue (which then in turn is 64bits wide).


Since we will probably never have more than 4GiB doorbells we should be 
pretty save to use 32bits here.


Christian.



Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to match the input.

Agree.

- Shashank


Alex


+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
   /* vm ioctl */
   #define AMDGPU_VM_OP_RESERVE_VMID  1
   #define AMDGPU_VM_OP_UNRESERVE_VMID2
--
2.34.1





Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Alex Deucher
On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:
>
> Hey Alex,
>
> On 03/02/2023 23:07, Alex Deucher wrote:
> > On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  
> > wrote:
> >> From: Alex Deucher 
> >>
> >> This patch intorduces new UAPI/IOCTL for usermode graphics
> >> queue. The userspace app will fill this structure and request
> >> the graphics driver to add a graphics work queue for it. The
> >> output of this UAPI is a queue id.
> >>
> >> This UAPI maps the queue into GPU, so the graphics app can start
> >> submitting work to the queue as soon as the call returns.
> >>
> >> Cc: Alex Deucher 
> >> Cc: Christian Koenig 
> >> Signed-off-by: Alex Deucher 
> >> Signed-off-by: Shashank Sharma 
> >> ---
> >>   include/uapi/drm/amdgpu_drm.h | 53 +++
> >>   1 file changed, 53 insertions(+)
> >>
> >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >> index 4038abe8505a..6c5235d107b3 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -54,6 +54,7 @@ extern "C" {
> >>   #define DRM_AMDGPU_VM  0x13
> >>   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >>   #define DRM_AMDGPU_SCHED   0x15
> >> +#define DRM_AMDGPU_USERQ   0x16
> >>
> >>   #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>   #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >> @@ -71,6 +72,7 @@ extern "C" {
> >>   #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>
> >>   /**
> >>* DOC: memory domains
> >> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> >>  union drm_amdgpu_ctx_out out;
> >>   };
> >>
> >> +/* user queue IOCTL */
> >> +#define AMDGPU_USERQ_OP_CREATE 1
> >> +#define AMDGPU_USERQ_OP_FREE   2
> >> +
> >> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> >> +
> >> +struct drm_amdgpu_userq_mqd {
> >> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >> +   __u32   flags;
> >> +   /** IP type: AMDGPU_HW_IP_* */
> >> +   __u32   ip_type;
> >> +   /** GEM object handle */
> >> +   __u32   doorbell_handle;
> >> +   /** Doorbell offset in dwords */
> >> +   __u32   doorbell_offset;
> > Since doorbells are 64 bit, maybe this offset should be in qwords.
>
> Can you please help to cross check this information ? All the existing
> kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

Alex

>
> >
> >> +   /** GPU virtual address of the queue */
> >> +   __u64   queue_va;
> >> +   /** Size of the queue in bytes */
> >> +   __u64   queue_size;
> >> +   /** GPU virtual address of the rptr */
> >> +   __u64   rptr_va;
> >> +   /** GPU virtual address of the wptr */
> >> +   __u64   wptr_va;
> >> +};
> >> +
> >> +struct drm_amdgpu_userq_in {
> >> +   /** AMDGPU_USERQ_OP_* */
> >> +   __u32   op;
> >> +   /** Flags */
> >> +   __u32   flags;
> >> +   /** Queue handle to associate the queue free call with,
> >> +* unused for queue create calls */
> >> +   __u32   queue_id;
> >> +   __u32   pad;
> >> +   /** Queue descriptor */
> >> +   struct drm_amdgpu_userq_mqd mqd;
> >> +};
> >> +
> >> +struct drm_amdgpu_userq_out {
> >> +   /** Queue handle */
> >> +   __u32   q_id;
> > Maybe this should be queue_id to match the input.
>
> Agree.
>
> - Shashank
>
> > Alex
> >
> >> +   /** Flags */
> >> +   __u32   flags;
> >> +};
> >> +
> >> +union drm_amdgpu_userq {
> >> +   struct drm_amdgpu_userq_in in;
> >> +   struct drm_amdgpu_userq_out out;
> >> +};
> >> +
> >>   /* vm ioctl */
> >>   #define AMDGPU_VM_OP_RESERVE_VMID  1
> >>   #define AMDGPU_VM_OP_UNRESERVE_VMID2
> >> --
> >> 2.34.1
> >>


Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-03 Thread Shashank Sharma

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 53 +++
  1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
  #define DRM_AMDGPU_VM  0x13
  #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
  #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

  #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
  #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
  #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

  /**
   * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
 union drm_amdgpu_ctx_out out;
  };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.


Can you please help to cross check this information ? All the existing 
kernel doorbell calculations are keeping doorbells size as sizeof(u32)





+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to match the input.


Agree.

- Shashank


Alex


+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
  /* vm ioctl */
  #define AMDGPU_VM_OP_RESERVE_VMID  1
  #define AMDGPU_VM_OP_UNRESERVE_VMID2
--
2.34.1



Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-03 Thread Alex Deucher
On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:
>
> From: Alex Deucher 
>
> This patch intorduces new UAPI/IOCTL for usermode graphics
> queue. The userspace app will fill this structure and request
> the graphics driver to add a graphics work queue for it. The
> output of this UAPI is a queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher 
> Cc: Christian Koenig 
> Signed-off-by: Alex Deucher 
> Signed-off-by: Shashank Sharma 
> ---
>  include/uapi/drm/amdgpu_drm.h | 53 +++
>  1 file changed, 53 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 4038abe8505a..6c5235d107b3 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>  #define DRM_AMDGPU_VM  0x13
>  #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
>  #define DRM_AMDGPU_SCHED   0x15
> +#define DRM_AMDGPU_USERQ   0x16
>
>  #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_VM, union drm_amdgpu_vm)
>  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>  #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>
>  /**
>   * DOC: memory domains
> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> union drm_amdgpu_ctx_out out;
>  };
>
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE 1
> +#define AMDGPU_USERQ_OP_FREE   2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> +
> +struct drm_amdgpu_userq_mqd {
> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> +   __u32   flags;
> +   /** IP type: AMDGPU_HW_IP_* */
> +   __u32   ip_type;
> +   /** GEM object handle */
> +   __u32   doorbell_handle;
> +   /** Doorbell offset in dwords */
> +   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.


> +   /** GPU virtual address of the queue */
> +   __u64   queue_va;
> +   /** Size of the queue in bytes */
> +   __u64   queue_size;
> +   /** GPU virtual address of the rptr */
> +   __u64   rptr_va;
> +   /** GPU virtual address of the wptr */
> +   __u64   wptr_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> +   /** AMDGPU_USERQ_OP_* */
> +   __u32   op;
> +   /** Flags */
> +   __u32   flags;
> +   /** Queue handle to associate the queue free call with,
> +* unused for queue create calls */
> +   __u32   queue_id;
> +   __u32   pad;
> +   /** Queue descriptor */
> +   struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +   /** Queue handle */
> +   __u32   q_id;

Maybe this should be queue_id to match the input.

Alex

> +   /** Flags */
> +   __u32   flags;
> +};
> +
> +union drm_amdgpu_userq {
> +   struct drm_amdgpu_userq_in in;
> +   struct drm_amdgpu_userq_out out;
> +};
> +
>  /* vm ioctl */
>  #define AMDGPU_VM_OP_RESERVE_VMID  1
>  #define AMDGPU_VM_OP_UNRESERVE_VMID2
> --
> 2.34.1
>


[PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-03 Thread Shashank Sharma
From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 include/uapi/drm/amdgpu_drm.h | 53 +++
 1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
 #define DRM_AMDGPU_VM  0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
 #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
 
 /**
  * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;
+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;
+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID  1
 #define AMDGPU_VM_OP_UNRESERVE_VMID2
-- 
2.34.1