On 09/20/2016 12:16 AM, Ilia Mirkin wrote:
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
This instruction is available since SM20 (Fermi) and allow to do
(a << b) + c in one shot. In some situations, IMAD should be
replaced by SHLADD when b is a power of 2, and ADD+SHL should be
replaced by SHLADD as well.

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h                | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp        | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp       | 6 +++---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 ++++
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp  | 7 +++++--
 6 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d6011d9..bedbdcc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -57,6 +57,7 @@ enum operation
    OP_MAD,
    OP_FMA,
    OP_SAD, // abs(src0 - src1) + src2
+   OP_SHLADD,
    OP_ABS,
    OP_NEG,
    OP_NOT,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 22f2f5d..dbd0f7d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
    "mad",
    "fma",
    "sad",
+   "shladd",
    "abs",
    "neg",
    "not",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 7d7b315..273ec34 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] =
    0, 0,                   // NOP, PHI
    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
    1, 1, 2,                // MOV, LOAD, STORE
-   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
    1, 1, 1,                // ABS, NEG, NOT
    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
    2, 2, 1,                // MAX, MIN, SAT
@@ -70,10 +70,10 @@ const OpClass Target::operationClass[] =
    OPCLASS_MOVE,
    OPCLASS_LOAD,
    OPCLASS_STORE,
-   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
+   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
    OPCLASS_ARITH, OPCLASS_ARITH,
-   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
    OPCLASS_CONVERT, OPCLASS_CONVERT,
    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index 6b8f767..cf8a08f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
    case OP_DIV:
    case OP_MOD:
       return false;
+   case OP_SHLADD:
+      if (isFloatType(ty))
+         return false;
+      break;
    default:
       break;
    }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index b37ea73..5ab95fc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
    case OP_EXTBF:
    case OP_EXIT: // want exit modifier instead (on NOP if required)
    case OP_MEMBAR:
+   case OP_SHLADD:
       return false;
    case OP_SAD:
       return ty == TYPE_S32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index f75e395..d8fa285 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -105,6 +105,7 @@ static const struct opProperties _initProps[] =
    { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+   { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 },
    { OP_MADSP,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
    { OP_ABS,    0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
    { OP_NEG,    0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
@@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo()
    {
       // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR,
       // SET, SELP, SLCT
-      0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
+      0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000
    };

    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
    {
       // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN
-      0x0670ca00, 0x00000000, 0x00000000, 0x00000000
+      0x0ce0ca00, 0x00000000, 0x00000000, 0x00000000
    };

I think you forgot to adjust these bitmasks for nv50...

Correct..



    static const operation noDest[] =
@@ -417,6 +418,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
       return false;
    if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
       return false;
+   if (op == OP_SHLADD && isFloatType(ty))
+      return false;
    return true;
 }

--
2.10.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to