On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:
See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and exasanv110.fp. Any ideas on what could be causing the first fmul to require $r0 and/or $r1?

'tex nodep $r4 $r2 0x0 0x1 t2d 0xf'

is actually:

'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf'

Very confusing, I know.


Cheers,
Aaryaman

On Sat, Jun 10, 2017 at 4:10 PM, Aaryaman Vasishta <[email protected] <mailto:[email protected]>> wrote:

    This patch adds proper delays to maxwell exa shaders. rendercheck tests
    seem consistent with/without this patch. I haven't extensively tested
    them though.

    Trello:
    https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
    
<https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays>

    Signed-off-by: Aaryaman Vasishta <[email protected]
    <mailto:[email protected]>>
    ---
      src/shader/exac8nv110.fp  | 10 +++++-----
      src/shader/exac8nv110.fpc | 18 +++++++++---------
      src/shader/exacanv110.fp  | 10 +++++-----
      src/shader/exacanv110.fpc | 18 +++++++++---------
      src/shader/exacmnv110.fp  | 10 +++++-----
      src/shader/exacmnv110.fpc | 18 +++++++++---------
      src/shader/exas8nv110.fp  |  6 +++---
      src/shader/exas8nv110.fpc | 12 ++++++------
      src/shader/exasanv110.fp  | 10 +++++-----
      src/shader/exasanv110.fpc | 18 +++++++++---------
      src/shader/exascnv110.fp  |  6 +++---
      src/shader/exascnv110.fpc | 10 +++++-----
      src/shader/videonv110.fp  | 14 +++++++-------
      src/shader/videonv110.fpc | 26 +++++++++++++-------------
      14 files changed, 93 insertions(+), 93 deletions(-)

    diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
    index ce78036..101b67f 100644
    --- a/src/shader/exac8nv110.fp
    +++ b/src/shader/exac8nv110.fp
    @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r3 a[0x94] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1
    wt 0x2)
      ipa $r2 a[0x90] $r0 0x0 0x1
      tex nodep $r1 $r2 0x0 0x1 t2d 0x8
      ipa $r3 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
      ipa $r2 a[0x80] $r0 0x0 0x1
      tex nodep $r0 $r2 0x0 0x0 t2d 0x8
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
      fmul ftz $r3 $r0 $r1
      mov $r2 $r3 0xf
      mov $r1 $r3 0xf
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1) (st 0xf) (st 0x0)
      mov $r0 $r3 0xf
      exit
      #endif
    diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
    index 4aa1368..1f7d649 100644
    --- a/src/shader/exac8nv110.fpc
    +++ b/src/shader/exac8nv110.fpc
    @@ -1,36 +1,36 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff03,
      0xe043ff89,
    -0xfc0007e0,
    -0x001f8000,
    +0x21e0072f,
    +0x005cbc03,
      0x0007ff02,
      0xe043ff89,
      0x2ff70201,
      0xc03a0014,
      0x4007ff03,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe5e0074f,
    +0x001fbc06,
      0x0007ff02,
      0xe043ff88,
      0x2ff70200,
      0xc03a0004,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc201fe6,
    +0x001f8400,
      0x00170003,
      0x5c681000,
      0x00370002,
      0x5c980780,
      0x00370001,
      0x5c980780,
    -0xfc0007e0,
    +0xfde007e1,
      0x001f8000,
      0x00370000,
      0x5c980780,
    diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
    index a70d5c5..8a9bd43 100644
    --- a/src/shader/exacanv110.fp
    +++ b/src/shader/exacanv110.fp
    @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r3 a[0x94] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2)
      ipa $r2 a[0x90] $r0 0x0 0x1
      tex nodep $r4 $r2 0x0 0x1 t2d 0xf
      ipa $r1 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf)
      ipa $r0 a[0x80] $r0 0x0 0x1
      tex nodep $r0 $r0 0x0 0x0 t2d 0xf
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1 wt 0x4) (st 0x1) (st 0x1)
      fmul ftz $r3 $r3 $r7
      fmul ftz $r2 $r2 $r6
      fmul ftz $r1 $r1 $r5
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1 wt 0x1) (st 0xf) (st 0x0)
      fmul ftz $r0 $r0 $r4
      exit
      #endif
    diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
    index 7c0ca5e..08a633c 100644
    --- a/src/shader/exacanv110.fpc
    +++ b/src/shader/exacanv110.fpc
    @@ -1,36 +1,36 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff03,
      0xe043ff89,
    -0xfc0007e0,
    -0x001f8000,
    +0x21e0072f,
    +0x001d3c03,
      0x0007ff02,
      0xe043ff89,
      0xaff70204,
      0xc03a0017,
      0x4007ff01,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe9e0274f,
    +0x001fbc04,
      0x0007ff00,
      0xe043ff88,
      0xaff70000,
      0xc03a0007,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc2027e1,
    +0x001f8400,
      0x00770303,
      0x5c681000,
      0x00670202,
      0x5c681000,
      0x00570101,
      0x5c681000,
    -0xfc0007e0,
    +0xfde00fe1,
      0x001f8000,
      0x00470000,
      0x5c681000,
    diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
    index fe5c294..39c49de 100644
    --- a/src/shader/exacmnv110.fp
    +++ b/src/shader/exacmnv110.fp
    @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r3 a[0x94] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
      ipa $r2 a[0x90] $r0 0x0 0x1
      tex nodep $r4 $r2 0x0 0x1 t2d 0x8
      ipa $r1 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
      ipa $r0 a[0x80] $r0 0x0 0x1
      tex nodep $r0 $r0 0x0 0x0 t2d 0xf
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
      fmul ftz $r3 $r3 $r4
      fmul ftz $r2 $r2 $r4
      fmul ftz $r1 $r1 $r4
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1) (st 0xf) (st 0x0)
      fmul ftz $r0 $r0 $r4
      exit
      #endif
    diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
    index 9d62c1a..f5f06e2 100644
    --- a/src/shader/exacmnv110.fpc
    +++ b/src/shader/exacmnv110.fpc
    @@ -1,36 +1,36 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff03,
      0xe043ff89,
    -0xfc0007e0,
    -0x001f8000,
    +0xe1e0072f,
    +0x0008bc03,
      0x0007ff02,
      0xe043ff89,
      0x2ff70204,
      0xc03a0014,
      0x4007ff01,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe5e0274f,
    +0x001fbc06,
      0x0007ff00,
      0xe043ff88,
      0xaff70000,
      0xc03a0007,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc201fe1,
    +0x001f8400,
      0x00470303,
      0x5c681000,
      0x00470202,
      0x5c681000,
      0x00470101,
      0x5c681000,
    -0xfc0007e0,
    +0xfde007e1,
      0x001f8000,
      0x00470000,
      0x5c681000,
    diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
    index 4fe2e19..a555beb 100644
    --- a/src/shader/exas8nv110.fp
    +++ b/src/shader/exas8nv110.fp
    @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r1 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
      ipa $r0 a[0x80] $r0 0x0 0x1
      tex nodep $r0 $r0 0x0 0x0 t2d 0x8
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
      mov $r3 $r0 0xf
      mov $r2 $r0 0xf
      mov $r1 $r0 0xf
    diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
    index 1181c41..e58d168 100644
    --- a/src/shader/exas8nv110.fpc
    +++ b/src/shader/exas8nv110.fpc
    @@ -1,21 +1,21 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff01,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe1e0072f,
    +0x001fbc03,
      0x0007ff00,
      0xe043ff88,
      0x2ff70000,
      0xc03a0004,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc200fe1,
    +0x001f8400,
      0x00070003,
      0x5c980780,
      0x00070002,
    diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
    index 61374a6..9f8742a 100644
    --- a/src/shader/exasanv110.fp
    +++ b/src/shader/exasanv110.fp
    @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r3 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
      ipa $r2 a[0x80] $r0 0x0 0x1
      tex nodep $r4 $r2 0x0 0x0 t2d 0x8
      ipa $r1 a[0x94] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
      ipa $r0 a[0x90] $r0 0x0 0x1
      tex nodep $r0 $r0 0x0 0x1 t2d 0xf
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
      fmul ftz $r3 $r3 $r4
      fmul ftz $r2 $r2 $r4
      fmul ftz $r1 $r1 $r4
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1) (st 0xf) (st 0x0)
      fmul ftz $r0 $r0 $r4
      exit
      #endif
    diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
    index 5516a03..c291298 100644
    --- a/src/shader/exasanv110.fpc
    +++ b/src/shader/exasanv110.fpc
    @@ -1,36 +1,36 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff03,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe1e0072f,
    +0x0008bc03,
      0x0007ff02,
      0xe043ff88,
      0x2ff70204,
      0xc03a0004,
      0x4007ff01,
      0xe043ff89,
    -0xfc0007e0,
    -0x001f8000,
    +0xe5e0274f,
    +0x001fbc06,
      0x0007ff00,
      0xe043ff89,
      0xaff70000,
      0xc03a0017,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc201fe1,
    +0x001f8400,
      0x00470303,
      0x5c681000,
      0x00470202,
      0x5c681000,
      0x00470101,
      0x5c681000,
    -0xfc0007e0,
    +0xfde007e1,
      0x001f8000,
      0x00470000,
      0x5c681000,
    diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
    index 90bbb55..86e14e8 100644
    --- a/src/shader/exascnv110.fp
    +++ b/src/shader/exascnv110.fp
    @@ -25,14 +25,14 @@ NV110FP_Source[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r0 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r0 $r0
      ipa $r1 a[0x84] $r0 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
      ipa $r0 a[0x80] $r0 0x0 0x1
      tex nodep $r0 $r0 0x0 0x0 t2d 0xf
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf) (st 0x0) (st 0x0)
      exit
      #endif
    diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
    index 2dba15d..1fef5d2 100644
    --- a/src/shader/exascnv110.fpc
    +++ b/src/shader/exascnv110.fpc
    @@ -1,20 +1,20 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff00,
      0xe003ff87,
      0x00470000,
      0x50800000,
      0x4007ff01,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xfde0072f,
    +0x001fbc03,
      0x0007ff00,
      0xe043ff88,
      0xaff70000,
      0xc03a0007,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    +0xfc0007ef,
      0x001f8000,
      0x0007000f,
      0xe3000000,
    diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
    index 2728311..dd3816c 100644
    --- a/src/shader/videonv110.fp
    +++ b/src/shader/videonv110.fp
    @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
      };
      #else

    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
      ipa pass $r2 a[0x7c] 0x0 0x0 0x1
      mufu rcp $r2 $r2
      ipa $r0 a[0x80] $r2 0x0 0x1
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
      ipa $r1 a[0x84] $r2 0x0 0x1
      tex nodep $r4 $r0 0x0 0x0 t2d 0x8
      tex nodep $r0 $r0 0x0 0x1 t2d 0xc
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf) (st 0x6 wt 0x1) (st 0x6)
      depbar le 0x5 0x1 0x1
      fmul ftz $r5 $r4 c0[0x0]
      fadd ftz $r3 $r5 c0[0x4]
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x6) (st 0x6) (st 0xf)
      fadd ftz $r4 $r5 c0[0x8]
      fadd ftz $r5 $r5 c0[0xc]
      depbar le 0x5 0x0 0x0
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x6 wt 0x2) (st 0x1) (st 0x1)
      ffma ftz $r3 $r0 c0[0x10] $r3
      ffma ftz $r4 $r0 c0[0x14] $r4
      ffma ftz $r5 $r0 c0[0x18] $r5
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0x1) (st 0x1) (st 0x6)
      ffma ftz $r0 $r1 c0[0x1c] $r3
      ffma ftz $r2 $r1 c0[0x24] $r5
      ffma ftz $r1 $r1 c0[0x20] $r4
    -sched (st 0x0) (st 0x0) (st 0x0)
    +sched (st 0xf) (st 0x0) (st 0x0)
      exit
      #endif
    diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
    index 31d745a..8fbc246 100644
    --- a/src/shader/videonv110.fpc
    +++ b/src/shader/videonv110.fpc
    @@ -1,52 +1,52 @@
    -0xfc0007e0,
    -0x001f8000,
    +0xe1a0070f,
    +0x003c3c01,
      0xcff7ff02,
      0xe003ff87,
      0x00470202,
      0x50800000,
      0x0027ff00,
      0xe043ff88,
    -0xfc0007e0,
    -0x001f8000,
    +0xe1e0072f,
    +0x001cbc03,
      0x4027ff01,
      0xe043ff88,
      0x2ff70004,
      0xc03a0004,
      0x2ff70000,
      0xc03a0016,
    -0xfc0007e0,
    -0x001f8000,
    +0xfcc007ef,
    +0x001f9801,
      0x34170001,
      0xf0f00000,
      0x00070405,
      0x4c681000,
      0x00170503,
      0x4c581000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfcc007e6,
    +0x001fbc00,
      0x00270504,
      0x4c581000,
      0x00370505,
      0x4c581000,
      0x34070000,
      0xf0f00000,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc2017e6,
    +0x001f8400,
      0x00470003,
      0x49a00180,
      0x00570004,
      0x49a00200,
      0x00670005,
      0x49a00280,
    -0xfc0007e0,
    -0x001f8000,
    +0xfc2007e1,
    +0x001f9800,
      0x00770100,
      0x49a00180,
      0x00970102,
      0x49a00280,
      0x00870101,
      0x49a00200,
    -0xfc0007e0,
    +0xfc0007ef,
      0x001f8000,
      0x0007000f,
      0xe3000000,
    --
    2.11.0


_______________________________________________
Nouveau mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/nouveau

Reply via email to