There are variants of the instructions that don't have legacy versions
that we don't prevent from being selected. As such, I think this
approach is doomed.

Not for inclusion upstream.
---

On top of everything, totally untested. And also unsure I got the instruction
patching logic right - just going off of a brief glance at the GCN docs.

 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 53 +++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index b41e5da..a49ebca 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -414,6 +414,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
        case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
         case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
                return 1;
 
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -465,7 +466,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_MAX_WINDOW_RECTANGLES:
        case PIPE_CAP_NATIVE_FENCE_FD:
        case PIPE_CAP_TGSI_FS_FBFETCH:
-       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
                return 0;
 
        case PIPE_CAP_QUERY_BUFFER_OBJECT:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5ca974e..fece98f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6132,6 +6132,57 @@ static unsigned si_get_shader_binary_size(struct 
si_shader *shader)
        return size;
 }
 
+static void si_shader_binary_upload_patch(struct si_shader *shader, unsigned 
*ptr)
+{
+       const struct radeon_shader_binary *mainb = &shader->binary;
+
+       if (!shader->selector->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) {
+               util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+               return;
+       }
+
+       /* Binary-patch V_MUL_F32 (GCN1: 0x8, GCN3: 0x5) into V_MUL_LEGACY_F32
+        * (GCN1: 0x7, 0x4) and V_MAD_F32 (GCN1: 0x141, GCN3: 0x1c1) into
+        * V_MAD_LEGACY_F32 (GCN1: 0x140, GCN3: 0x1c0).
+        *
+        * TODO: Also replace V_MAC_F32? The legacy variant is gone on GCN3.
+        *
+        * Detect the VOP2 and VOP3a encodings and patch them. All other
+        * encodings are 8 bytes, just pass them through.
+        */
+       const unsigned mul_op =
+               shader->selector->screen->b.chip_class < VI ? 0x8 : 0x5;
+       const unsigned mad_op =
+               shader->selector->screen->b.chip_class < VI ? 0x141 : 0x1c1;
+       int i;
+       unsigned *code = (unsigned *)mainb->code;
+       for (i = 0; i < mainb->code_size / 4; i++) {
+               if ((*code & (1U << 31)) == 0) {
+                       unsigned opcode = (*code >> 25) & 0x7f;
+                       if (opcode == mul_op)
+                               *ptr++ = util_cpu_to_le32(
+                                               (*code++ & 0x80ffffff) |
+                                               (opcode - 1) << 25);
+                       else
+                               *ptr++ = util_cpu_to_le32(*code++);
+               } else if ((*code & (0xFCU << 24)) == 0xD0000000) {
+                       unsigned opcode = (*code >> 16) & 0x3ff;
+                       if (opcode == mad_op || opcode == 0x100 + mul_op)
+                               *ptr++ = util_cpu_to_le32(
+                                               (*code++ & 0xfc00ffff) |
+                                               (opcode - 1) << 16);
+                       else
+                               *ptr++ = util_cpu_to_le32(*code++);
+                       *ptr++ = util_cpu_to_le32(*code++);
+                       i++;
+               } else {
+                       *ptr++ = util_cpu_to_le32(*code++);
+                       *ptr++ = util_cpu_to_le32(*code++);
+                       i++;
+               }
+       }
+}
+
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader)
 {
        const struct radeon_shader_binary *prolog =
@@ -6163,7 +6214,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, 
struct si_shader *shader)
                ptr += prolog->code_size;
        }
 
-       util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+       si_shader_binary_upload_patch(shader, (unsigned *)ptr);
        ptr += mainb->code_size;
 
        if (epilog)
-- 
2.10.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to