This instruction is new since SM50 (Maxwell) and allows to perform an add with three sources. Unfortunately, it only supports integers.
v3: - set commutative flag for OP_ADD3 - move OP_ADD3 after arithmetic ops Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 ++++ src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 12 ++++++++---- 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d6011d9..12a8b10 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -57,6 +57,7 @@ enum operation OP_MAD, OP_FMA, OP_SAD, // abs(src0 - src1) + src2 + OP_ADD3, OP_ABS, OP_NEG, OP_NOT, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 22f2f5d..83340f2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = "mad", "fma", "sad", + "add3", "abs", "neg", "not", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 7d7b315..dcf35ba 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, // NOP, PHI 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 1, 1, 2, // MOV, LOAD, STORE - 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, ADD3 1, 1, 1, // ABS, NEG, NOT 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1, // MAX, MIN, SAT @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] = OPCLASS_MOVE, OPCLASS_LOAD, OPCLASS_STORE, - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, ADD3 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp index 6b8f767..eecd61f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const case OP_DIV: case OP_MOD: return false; + case OP_ADD3: + if (isFloatType(ty)) + return false; + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index b37ea73..e1a7963 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: + case OP_ADD3: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index f5981de..a927c1e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -147,7 +147,9 @@ static const struct opProperties _initProps[] = { OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, { OP_SUCLAMP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, { OP_SUBFM, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, - { OP_SUEAU, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 } + { OP_SUEAU, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, + // gm107 ops: + { OP_ADD3, 0x7, 0x0, 0x0, 0x0, 0x2, 0x2 }, }; void TargetNVC0::initOpInfo() @@ -156,14 +158,14 @@ void TargetNVC0::initOpInfo() static const uint32_t commutative[(OP_LAST + 31) / 32] = { - // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN - 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + // ADD, MAD, MUL, ADD3, AND, OR, XOR, MAX, MIN + 0x0ce2ca00, 0x0000007e, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV - 0x0670ca00, 0x00000000, 0x00000000, 0x00000000 + 0x0ce0ca00, 0x00000000, 0x00000000, 0x00000000 }; static const operation noDest[] = @@ -416,6 +418,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const return false; if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) return false; + if (op == OP_ADD3) + return false; return true; } -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev