On 02/07/2016 05:56 AM, Ilia Mirkin wrote:
On Sat, Feb 6, 2016 at 6:13 PM, Ilia Mirkin <[email protected]> wrote:
On Sat, Feb 6, 2016 at 5:38 PM, Samuel Pitoiset
<[email protected]> wrote:
When indirect compute is used, the size of the grid (in blocks) is
stored as three integers inside a buffer. This requires a macro to
set up GRIDDIM_YX and GRIDDIM_Z.

Signed-off-by: Samuel Pitoiset <[email protected]>
---
  src/gallium/drivers/nouveau/nvc0/mme/Makefile      |  2 +-
  src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme   | 19 +++++++++++++++++++
  src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h | 13 +++++++++++++
  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 18 +++++++++++++++---
  src/gallium/drivers/nouveau/nvc0/nvc0_macros.h     |  2 ++
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 ++
  6 files changed, 52 insertions(+), 4 deletions(-)
  create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
  create mode 100644 src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h

diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile 
b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
index 1c0f583..52fb0a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
+++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
@@ -1,5 +1,5 @@
  ENVYAS?=envyas
-TARGETS=com9097.mme.h
+TARGETS=com9097.mme.h com90c0.mme.h

  all: $(TARGETS)

diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme 
b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
new file mode 100644
index 0000000..ee7f726
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
@@ -0,0 +1,19 @@
+/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
+ *
+ * arg     = num_groups_x
+ * parm[0] = num_groups_y
+ * parm[1] = num_groups_z
+ */
+.section #mme90c0_launch_grid_indirect
+   parm $r2
+   parm $r3
+   mov $r4 (or $r1 $r2)
+   mov $r4 (or $r3 $r4)
+   braz $r4 #fail
+   maddr 0x108e /* GRIDDIM_YX */

You can move this up, e.g.

parm $r2 maddr 0x108e /* GRIDDIM_XY */

+   mov $r4 (extrshl $r2 $r0 0x10 0x10)

If you make this

(extrinsrt $r1 $r2 0x0 0x10 0x10)

Oh and even better, do this as part of the computation that precedes
the braz, that way you save another op :)

mmh? How this can still be reduced? Currently I have:

.section #mme90c0_launch_grid_indirect
   parm $r2 maddr 0x108e /* GRIDDOM_YX */
   parm $r3
   mov $r4 (or $r1 $r2)
   mov $r4 (or $r3 $r4)
   braz $r4 #fail
exit send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* (num_groups_y << 16) | num_groups_x */
   send $r3
fail:
   nop
   exit



then you can make it directly an argument to send, avoiding the separate or.

+   exit send (or $r4 $r1) /* (num_groups_y << 16) | num_groups_x */
+   send $r3
+fail:
+   exit

I think you need a nop here.

+
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h 
b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
new file mode 100644

I think Emil is going to yell at you about not adding this file to
some list somewhere so that make dist picks it up.

index 0000000..89076cf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
@@ -0,0 +1,13 @@
+uint32_t mme90c0_launch_grid_indirect[] = {
+       0x00000201,
+       0x00000301,
+/* 0x0009: fail */
+       0x00128c10,
+       0x00131c10,
+       0x00016007,
+       0x04238021,
+       0x84008413,
+       0x001260c0,
+       0x00001841,
+       0x00000091,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index e63bdcb..dbf2148 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -452,9 +452,21 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct 
pipe_grid_info *info)
     PUSH_DATA (push, cp->num_gprs);

     /* grid/block setup */
-   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
-   PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
-   PUSH_DATA (push, info->grid[2]);
+   if (unlikely(info->indirect)) {
+      struct nv04_resource *res = nv04_resource(info->indirect);
+      uint32_t offset = res->offset + info->indirect_offset;
+      unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
+
+      nouveau_pushbuf_space(push, 16, 0, 1);
+      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+      nouveau_pushbuf_data(push, res->bo, offset,
+                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+   } else {
+      BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+      PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+      PUSH_DATA (push, info->grid[2]);
+   }
     BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
     PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
     PUSH_DATA (push, info->block[2]);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 49e176c..57262fe 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -35,4 +35,6 @@

  #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE                       0x00003858

+#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT       0x00003860
+
  #endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 84e4253..85be1cc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -36,6 +36,7 @@
  #include "nvc0/nvc0_screen.h"

  #include "nvc0/mme/com9097.mme.h"
+#include "nvc0/mme/com90c0.mme.h"

  static boolean
  nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -1053,6 +1054,7 @@ nvc0_screen_create(struct nouveau_device *dev)
     MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, 
mme9097_draw_arrays_indirect_count);
     MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, 
mme9097_draw_elts_indirect_count);
     MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+   MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, 
mme90c0_launch_grid_indirect);

     BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
     PUSH_DATA (push, 1);
--
2.6.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to