Save first_wave bit from exec_hi to ttmp1. This allows the high bits
of exec_lo/exec_hi (which hold a 48-bit address) to be cleared in a
follow-up patch.

Signed-off-by: Jay Cornwall <[email protected]>
Cc: Laurent Morichetti <[email protected]>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 596 +++++++++---------
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    |  14 +-
 2 files changed, 310 insertions(+), 300 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 9f435c777ba0..9c903c38dd74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -665,7 +665,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 };
 
 static const uint32_t cwsr_trap_nv1x_hex[] = {
-       0xbf820001, 0xbf8201c6,
+       0xbf820001, 0xbf8201ca,
        0xb0804004, 0xb978f802,
        0x8a788678, 0xb96ef801,
        0x876eff6e, 0x00000800,
@@ -710,24 +710,25 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xb9faf807, 0xbeee037e,
        0xbeef037f, 0xbefe0480,
        0xbf900004, 0xbf8e0002,
-       0xbf88fffe, 0xb97b02dc,
-       0x8f7b997b, 0x887b7b7f,
-       0xb97a2a05, 0x807a817a,
-       0xbf0d997b, 0xbf850002,
-       0x8f7a897a, 0xbf820001,
-       0x8f7a8a7a, 0x877bff7f,
-       0x0000ffff, 0x807aff7a,
-       0x00000200, 0x807a7e7a,
-       0x827b807b, 0xf4491c3d,
-       0xfa000050, 0xf4491d3d,
-       0xfa000060, 0xf4411e7d,
-       0xfa000074, 0xbef4037e,
-       0x8775ff7f, 0x0000ffff,
-       0x8875ff75, 0x00040000,
-       0xbef60380, 0xbef703ff,
-       0x10807fac, 0xbef1037c,
-       0xbef00380, 0xb97302dc,
-       0x8f739973, 0x8873737f,
+       0xbf88fffe, 0x877aff7f,
+       0x04000000, 0x8f7a857a,
+       0x886d7a6d, 0xb97b02dc,
+       0x8f7b997b, 0xb97a2a05,
+       0x807a817a, 0xbf0d997b,
+       0xbf850002, 0x8f7a897a,
+       0xbf820001, 0x8f7a8a7a,
+       0x877bff7f, 0x0000ffff,
+       0x807aff7a, 0x00000200,
+       0x807a7e7a, 0x827b807b,
+       0xf4491c3d, 0xfa000050,
+       0xf4491d3d, 0xfa000060,
+       0xf4411e7d, 0xfa000074,
+       0xbef4037e, 0x8775ff7f,
+       0x0000ffff, 0x8875ff75,
+       0x00040000, 0xbef60380,
+       0xbef703ff, 0x10807fac,
+       0xbef1037c, 0xbef00380,
+       0xb97302dc, 0x8f739973,
        0xb97bf816, 0xba80f816,
        0x00000000, 0xbefe03c1,
        0x907c9973, 0x877c817c,
@@ -757,8 +758,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xbefc037e, 0xbefe037c,
        0xbefc0370, 0xf4611b3a,
        0xf8000000, 0x80708470,
-       0xbefc037e, 0xbefe037c,
-       0xbefc0370, 0xf4611b7a,
+       0xbefc037e, 0x8a7aff6d,
+       0x80000000, 0xbefe037c,
+       0xbefc0370, 0xf4611eba,
        0xf8000000, 0x80708470,
        0xbefc037e, 0xbefe037c,
        0xbefc0370, 0xf4611bba,
@@ -819,8 +821,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xbeff0380, 0xbf820001,
        0xbeff03c1, 0xb97b4306,
        0x877bc17b, 0xbf840044,
-       0xbf8a0000, 0x877aff73,
-       0x04000000, 0xbf840040,
+       0xbf8a0000, 0x877aff6d,
+       0x80000000, 0xbf840040,
        0x8f7b867b, 0x8f7b827b,
        0xbef6037b, 0xb9702a05,
        0x80708170, 0xbf0d9973,
@@ -892,169 +894,168 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xe0704000, 0x705d0000,
        0x807c817c, 0x8070ff70,
        0x00000080, 0xbf0a7b7c,
-       0xbf85fff8, 0xbf82013d,
+       0xbf85fff8, 0xbf82013c,
        0xbef4037e, 0x8775ff7f,
        0x0000ffff, 0x8875ff75,
        0x00040000, 0xbef60380,
        0xbef703ff, 0x10807fac,
        0xb97202dc, 0x8f729972,
-       0x8872727f, 0x876eff7f,
-       0x04000000, 0xbf840034,
+       0x876eff7f, 0x04000000,
+       0xbf840034, 0xbefe03c1,
+       0x907c9972, 0x877c817c,
+       0xbf06817c, 0xbf850002,
+       0xbeff0380, 0xbf820001,
+       0xbeff03c1, 0xb96f4306,
+       0x876fc16f, 0xbf840029,
+       0x8f6f866f, 0x8f6f826f,
+       0xbef6036f, 0xb9782a05,
+       0x80788178, 0xbf0d9972,
+       0xbf850002, 0x8f788978,
+       0xbf820001, 0x8f788a78,
+       0xb96e1e06, 0x8f6e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0x8078ff78,
+       0x00000080, 0xbef603ff,
+       0x01000000, 0x907c9972,
+       0x877c817c, 0xbf06817c,
+       0xbefc0380, 0xbf850009,
+       0xe0310000, 0x781d0000,
+       0x807cff7c, 0x00000080,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7c, 0xbf85fff8,
+       0xbf820008, 0xe0310000,
+       0x781d0000, 0x807cff7c,
+       0x00000100, 0x8078ff78,
+       0x00000100, 0xbf0a6f7c,
+       0xbf85fff8, 0xbef80380,
        0xbefe03c1, 0x907c9972,
        0x877c817c, 0xbf06817c,
        0xbf850002, 0xbeff0380,
        0xbf820001, 0xbeff03c1,
-       0xb96f4306, 0x876fc16f,
-       0xbf840029, 0x8f6f866f,
-       0x8f6f826f, 0xbef6036f,
-       0xb9782a05, 0x80788178,
-       0xbf0d9972, 0xbf850002,
-       0x8f788978, 0xbf820001,
-       0x8f788a78, 0xb96e1e06,
-       0x8f6e8a6e, 0x80786e78,
-       0x8078ff78, 0x00000200,
-       0x8078ff78, 0x00000080,
-       0xbef603ff, 0x01000000,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbefc0380,
-       0xbf850009, 0xe0310000,
-       0x781d0000, 0x807cff7c,
-       0x00000080, 0x8078ff78,
-       0x00000080, 0xbf0a6f7c,
-       0xbf85fff8, 0xbf820008,
-       0xe0310000, 0x781d0000,
-       0x807cff7c, 0x00000100,
-       0x8078ff78, 0x00000100,
-       0xbf0a6f7c, 0xbf85fff8,
-       0xbef80380, 0xbefe03c1,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbf850002,
-       0xbeff0380, 0xbf820001,
-       0xbeff03c1, 0xb96f2a05,
-       0x806f816f, 0x8f6f826f,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbf850021,
-       0xbef603ff, 0x01000000,
-       0xbeee0378, 0x8078ff78,
-       0x00000200, 0xbefc0384,
-       0xe0304000, 0x785d0000,
-       0xe0304080, 0x785d0100,
-       0xe0304100, 0x785d0200,
-       0xe0304180, 0x785d0300,
-       0xbf8c3f70, 0x7e008500,
-       0x7e028501, 0x7e048502,
-       0x7e068503, 0x807c847c,
-       0x8078ff78, 0x00000200,
-       0xbf0a6f7c, 0xbf85ffee,
-       0xe0304000, 0x6e5d0000,
-       0xe0304080, 0x6e5d0100,
-       0xe0304100, 0x6e5d0200,
-       0xe0304180, 0x6e5d0300,
-       0xbf820032, 0xbef603ff,
+       0xb96f2a05, 0x806f816f,
+       0x8f6f826f, 0x907c9972,
+       0x877c817c, 0xbf06817c,
+       0xbf850021, 0xbef603ff,
        0x01000000, 0xbeee0378,
-       0x8078ff78, 0x00000400,
+       0x8078ff78, 0x00000200,
        0xbefc0384, 0xe0304000,
-       0x785d0000, 0xe0304100,
-       0x785d0100, 0xe0304200,
-       0x785d0200, 0xe0304300,
+       0x785d0000, 0xe0304080,
+       0x785d0100, 0xe0304100,
+       0x785d0200, 0xe0304180,
        0x785d0300, 0xbf8c3f70,
        0x7e008500, 0x7e028501,
        0x7e048502, 0x7e068503,
        0x807c847c, 0x8078ff78,
-       0x00000400, 0xbf0a6f7c,
-       0xbf85ffee, 0xb96f1e06,
-       0x876fc16f, 0xbf84000e,
-       0x8f6f836f, 0x806f7c6f,
-       0xbefe03c1, 0xbeff0380,
+       0x00000200, 0xbf0a6f7c,
+       0xbf85ffee, 0xe0304000,
+       0x6e5d0000, 0xe0304080,
+       0x6e5d0100, 0xe0304100,
+       0x6e5d0200, 0xe0304180,
+       0x6e5d0300, 0xbf820032,
+       0xbef603ff, 0x01000000,
+       0xbeee0378, 0x8078ff78,
+       0x00000400, 0xbefc0384,
        0xe0304000, 0x785d0000,
+       0xe0304100, 0x785d0100,
+       0xe0304200, 0x785d0200,
+       0xe0304300, 0x785d0300,
        0xbf8c3f70, 0x7e008500,
-       0x807c817c, 0x8078ff78,
-       0x00000080, 0xbf0a6f7c,
-       0xbf85fff7, 0xbeff03c1,
-       0xe0304000, 0x6e5d0000,
-       0xe0304100, 0x6e5d0100,
-       0xe0304200, 0x6e5d0200,
-       0xe0304300, 0x6e5d0300,
-       0xbf8c3f70, 0xb9782a05,
-       0x80788178, 0xbf0d9972,
-       0xbf850002, 0x8f788978,
-       0xbf820001, 0x8f788a78,
-       0xb96e1e06, 0x8f6e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0x80f8ff78,
-       0x00000050, 0xbef603ff,
-       0x01000000, 0xbefc03ff,
-       0x0000006c, 0x80f89078,
-       0xf429003a, 0xf0000000,
-       0xbf8cc07f, 0x80fc847c,
-       0xbf800000, 0xbe803100,
-       0xbe823102, 0x80f8a078,
-       0xf42d003a, 0xf0000000,
-       0xbf8cc07f, 0x80fc887c,
-       0xbf800000, 0xbe803100,
-       0xbe823102, 0xbe843104,
-       0xbe863106, 0x80f8c078,
-       0xf431003a, 0xf0000000,
-       0xbf8cc07f, 0x80fc907c,
-       0xbf800000, 0xbe803100,
-       0xbe823102, 0xbe843104,
-       0xbe863106, 0xbe883108,
-       0xbe8a310a, 0xbe8c310c,
-       0xbe8e310e, 0xbf06807c,
-       0xbf84fff0, 0xba80f801,
-       0x00000000, 0xbf8a0000,
+       0x7e028501, 0x7e048502,
+       0x7e068503, 0x807c847c,
+       0x8078ff78, 0x00000400,
+       0xbf0a6f7c, 0xbf85ffee,
+       0xb96f1e06, 0x876fc16f,
+       0xbf84000e, 0x8f6f836f,
+       0x806f7c6f, 0xbefe03c1,
+       0xbeff0380, 0xe0304000,
+       0x785d0000, 0xbf8c3f70,
+       0x7e008500, 0x807c817c,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7c, 0xbf85fff7,
+       0xbeff03c1, 0xe0304000,
+       0x6e5d0000, 0xe0304100,
+       0x6e5d0100, 0xe0304200,
+       0x6e5d0200, 0xe0304300,
+       0x6e5d0300, 0xbf8c3f70,
        0xb9782a05, 0x80788178,
        0xbf0d9972, 0xbf850002,
        0x8f788978, 0xbf820001,
        0x8f788a78, 0xb96e1e06,
        0x8f6e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
+       0x80f8ff78, 0x00000050,
        0xbef603ff, 0x01000000,
-       0xf4211bfa, 0xf0000000,
-       0x80788478, 0xf4211b3a,
+       0xbefc03ff, 0x0000006c,
+       0x80f89078, 0xf429003a,
+       0xf0000000, 0xbf8cc07f,
+       0x80fc847c, 0xbf800000,
+       0xbe803100, 0xbe823102,
+       0x80f8a078, 0xf42d003a,
+       0xf0000000, 0xbf8cc07f,
+       0x80fc887c, 0xbf800000,
+       0xbe803100, 0xbe823102,
+       0xbe843104, 0xbe863106,
+       0x80f8c078, 0xf431003a,
+       0xf0000000, 0xbf8cc07f,
+       0x80fc907c, 0xbf800000,
+       0xbe803100, 0xbe823102,
+       0xbe843104, 0xbe863106,
+       0xbe883108, 0xbe8a310a,
+       0xbe8c310c, 0xbe8e310e,
+       0xbf06807c, 0xbf84fff0,
+       0xba80f801, 0x00000000,
+       0xbf8a0000, 0xb9782a05,
+       0x80788178, 0xbf0d9972,
+       0xbf850002, 0x8f788978,
+       0xbf820001, 0x8f788a78,
+       0xb96e1e06, 0x8f6e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0xbef603ff,
+       0x01000000, 0xf4211bfa,
        0xf0000000, 0x80788478,
-       0xf4211b7a, 0xf0000000,
-       0x80788478, 0xf4211c3a,
+       0xf4211b3a, 0xf0000000,
+       0x80788478, 0xf4211b7a,
        0xf0000000, 0x80788478,
-       0xf4211c7a, 0xf0000000,
-       0x80788478, 0xf4211eba,
+       0xf4211c3a, 0xf0000000,
+       0x80788478, 0xf4211c7a,
        0xf0000000, 0x80788478,
-       0xf4211efa, 0xf0000000,
-       0x80788478, 0xf4211e7a,
+       0xf4211eba, 0xf0000000,
+       0x80788478, 0xf4211efa,
        0xf0000000, 0x80788478,
-       0xf4211cfa, 0xf0000000,
-       0x80788478, 0xf4211bba,
+       0xf4211e7a, 0xf0000000,
+       0x80788478, 0xf4211cfa,
        0xf0000000, 0x80788478,
-       0xbf8cc07f, 0xb9eef814,
        0xf4211bba, 0xf0000000,
        0x80788478, 0xbf8cc07f,
-       0xb9eef815, 0xbefc036f,
-       0xbefe0370, 0xbeff0371,
-       0x876f7bff, 0x000003ff,
-       0xb9ef4803, 0xb9f9f816,
-       0x876f7bff, 0xfffff800,
-       0x906f8b6f, 0xb9efa2c3,
-       0xb9f3f801, 0xb96e2a05,
-       0x806e816e, 0xbf0d9972,
-       0xbf850002, 0x8f6e896e,
-       0xbf820001, 0x8f6e8a6e,
-       0x806eff6e, 0x00000200,
-       0x806e746e, 0x826f8075,
-       0x876fff6f, 0x0000ffff,
-       0xf4091c37, 0xfa000050,
-       0xf4091d37, 0xfa000060,
-       0xf4011e77, 0xfa000074,
-       0xbf8cc07f, 0x906e8977,
-       0x876fff6e, 0x003f8000,
-       0x906e8677, 0x876eff6e,
-       0x02000000, 0x886e6f6e,
-       0xb9eef807, 0x876dff6d,
-       0x0000ffff, 0x87fe7e7e,
-       0x87ea6a6a, 0xb9faf802,
-       0xbe80226c, 0xbf810000,
+       0xb9eef814, 0xf4211bba,
+       0xf0000000, 0x80788478,
+       0xbf8cc07f, 0xb9eef815,
+       0xbefc036f, 0xbefe0370,
+       0xbeff0371, 0x876f7bff,
+       0x000003ff, 0xb9ef4803,
+       0xb9f9f816, 0x876f7bff,
+       0xfffff800, 0x906f8b6f,
+       0xb9efa2c3, 0xb9f3f801,
+       0xb96e2a05, 0x806e816e,
+       0xbf0d9972, 0xbf850002,
+       0x8f6e896e, 0xbf820001,
+       0x8f6e8a6e, 0x806eff6e,
+       0x00000200, 0x806e746e,
+       0x826f8075, 0x876fff6f,
+       0x0000ffff, 0xf4091c37,
+       0xfa000050, 0xf4091d37,
+       0xfa000060, 0xf4011e77,
+       0xfa000074, 0xbf8cc07f,
+       0x906e8977, 0x876fff6e,
+       0x003f8000, 0x906e8677,
+       0x876eff6e, 0x02000000,
+       0x886e6f6e, 0xb9eef807,
+       0x876dff6d, 0x0000ffff,
+       0x87fe7e7e, 0x87ea6a6a,
+       0xb9faf802, 0xbe80226c,
+       0xbf810000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
-       0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_arcturus_hex[] = {
@@ -1533,7 +1534,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-       0xbf820001, 0xbf8201c7,
+       0xbf820001, 0xbf8201cb,
        0xb0804004, 0xb978f802,
        0x8a788678, 0xb96ef801,
        0x876eff6e, 0x00000800,
@@ -1560,21 +1561,22 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0xb9fa0283, 0xbeee037e,
        0xbeef037f, 0xbefe0480,
        0xbf900004, 0xbf8cc07f,
+       0x877aff7f, 0x04000000,
+       0x8f7a857a, 0x886d7a6d,
        0xb97b02dc, 0x8f7b997b,
-       0x887b7b7f, 0xb97a2a05,
-       0x807a817a, 0xbf0d997b,
-       0xbf850002, 0x8f7a897a,
-       0xbf820001, 0x8f7a8a7a,
-       0x877bff7f, 0x0000ffff,
-       0x807aff7a, 0x00000200,
-       0x807a7e7a, 0x827b807b,
-       0xbef4037e, 0x8775ff7f,
-       0x0000ffff, 0x8875ff75,
-       0x00040000, 0xbef60380,
-       0xbef703ff, 0x10807fac,
-       0xbef1037c, 0xbef00380,
-       0xb97302dc, 0x8f739973,
-       0x8873737f, 0xbefe03c1,
+       0xb97a2a05, 0x807a817a,
+       0xbf0d997b, 0xbf850002,
+       0x8f7a897a, 0xbf820001,
+       0x8f7a8a7a, 0x877bff7f,
+       0x0000ffff, 0x807aff7a,
+       0x00000200, 0x807a7e7a,
+       0x827b807b, 0xbef4037e,
+       0x8775ff7f, 0x0000ffff,
+       0x8875ff75, 0x00040000,
+       0xbef60380, 0xbef703ff,
+       0x10807fac, 0xbef1037c,
+       0xbef00380, 0xb97302dc,
+       0x8f739973, 0xbefe03c1,
        0x907c9973, 0x877c817c,
        0xbf06817c, 0xbf850002,
        0xbeff0380, 0xbf820002,
@@ -1601,8 +1603,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0xbefc0380, 0xd7610002,
        0x0000f871, 0x807c817c,
        0xd7610002, 0x0000f86c,
-       0x807c817c, 0xd7610002,
-       0x0000f86d, 0x807c817c,
+       0x807c817c, 0x8a7aff6d,
+       0x80000000, 0xd7610002,
+       0x0000f87a, 0x807c817c,
        0xd7610002, 0x0000f86e,
        0x807c817c, 0xd7610002,
        0x0000f86f, 0x807c817c,
@@ -1688,7 +1691,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0xbf820001, 0xbeff03c1,
        0xb97b4306, 0x877bc17b,
        0xbf840044, 0xbf8a0000,
-       0x877aff73, 0x04000000,
+       0x877aff6d, 0x80000000,
        0xbf840040, 0x8f7b867b,
        0x8f7b827b, 0xbef6037b,
        0xb9702a05, 0x80708170,
@@ -1761,162 +1764,161 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
        0x705d0000, 0x807c817c,
        0x8070ff70, 0x00000080,
        0xbf0a7b7c, 0xbf85fff8,
-       0xbf820134, 0xbef4037e,
+       0xbf820133, 0xbef4037e,
        0x8775ff7f, 0x0000ffff,
        0x8875ff75, 0x00040000,
        0xbef60380, 0xbef703ff,
        0x10807fac, 0xb97202dc,
-       0x8f729972, 0x8872727f,
-       0x876eff7f, 0x04000000,
-       0xbf840034, 0xbefe03c1,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbf850002,
-       0xbeff0380, 0xbf820001,
-       0xbeff03c1, 0xb96f4306,
-       0x876fc16f, 0xbf840029,
-       0x8f6f866f, 0x8f6f826f,
-       0xbef6036f, 0xb9782a05,
-       0x80788178, 0xbf0d9972,
-       0xbf850002, 0x8f788978,
-       0xbf820001, 0x8f788a78,
-       0xb96e1e06, 0x8f6e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0x8078ff78,
-       0x00000080, 0xbef603ff,
-       0x01000000, 0x907c9972,
-       0x877c817c, 0xbf06817c,
-       0xbefc0380, 0xbf850009,
-       0xe0310000, 0x781d0000,
-       0x807cff7c, 0x00000080,
-       0x8078ff78, 0x00000080,
-       0xbf0a6f7c, 0xbf85fff8,
-       0xbf820008, 0xe0310000,
-       0x781d0000, 0x807cff7c,
-       0x00000100, 0x8078ff78,
-       0x00000100, 0xbf0a6f7c,
-       0xbf85fff8, 0xbef80380,
+       0x8f729972, 0x876eff7f,
+       0x04000000, 0xbf840034,
        0xbefe03c1, 0x907c9972,
        0x877c817c, 0xbf06817c,
        0xbf850002, 0xbeff0380,
        0xbf820001, 0xbeff03c1,
-       0xb96f2a05, 0x806f816f,
-       0x8f6f826f, 0x907c9972,
-       0x877c817c, 0xbf06817c,
-       0xbf850021, 0xbef603ff,
-       0x01000000, 0xbeee0378,
+       0xb96f4306, 0x876fc16f,
+       0xbf840029, 0x8f6f866f,
+       0x8f6f826f, 0xbef6036f,
+       0xb9782a05, 0x80788178,
+       0xbf0d9972, 0xbf850002,
+       0x8f788978, 0xbf820001,
+       0x8f788a78, 0xb96e1e06,
+       0x8f6e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
-       0xbefc0384, 0xe0304000,
-       0x785d0000, 0xe0304080,
-       0x785d0100, 0xe0304100,
-       0x785d0200, 0xe0304180,
-       0x785d0300, 0xbf8c3f70,
-       0x7e008500, 0x7e028501,
-       0x7e048502, 0x7e068503,
-       0x807c847c, 0x8078ff78,
-       0x00000200, 0xbf0a6f7c,
-       0xbf85ffee, 0xe0304000,
-       0x6e5d0000, 0xe0304080,
-       0x6e5d0100, 0xe0304100,
-       0x6e5d0200, 0xe0304180,
-       0x6e5d0300, 0xbf820032,
+       0x8078ff78, 0x00000080,
+       0xbef603ff, 0x01000000,
+       0x907c9972, 0x877c817c,
+       0xbf06817c, 0xbefc0380,
+       0xbf850009, 0xe0310000,
+       0x781d0000, 0x807cff7c,
+       0x00000080, 0x8078ff78,
+       0x00000080, 0xbf0a6f7c,
+       0xbf85fff8, 0xbf820008,
+       0xe0310000, 0x781d0000,
+       0x807cff7c, 0x00000100,
+       0x8078ff78, 0x00000100,
+       0xbf0a6f7c, 0xbf85fff8,
+       0xbef80380, 0xbefe03c1,
+       0x907c9972, 0x877c817c,
+       0xbf06817c, 0xbf850002,
+       0xbeff0380, 0xbf820001,
+       0xbeff03c1, 0xb96f2a05,
+       0x806f816f, 0x8f6f826f,
+       0x907c9972, 0x877c817c,
+       0xbf06817c, 0xbf850021,
        0xbef603ff, 0x01000000,
        0xbeee0378, 0x8078ff78,
-       0x00000400, 0xbefc0384,
+       0x00000200, 0xbefc0384,
        0xe0304000, 0x785d0000,
-       0xe0304100, 0x785d0100,
-       0xe0304200, 0x785d0200,
-       0xe0304300, 0x785d0300,
+       0xe0304080, 0x785d0100,
+       0xe0304100, 0x785d0200,
+       0xe0304180, 0x785d0300,
        0xbf8c3f70, 0x7e008500,
        0x7e028501, 0x7e048502,
        0x7e068503, 0x807c847c,
-       0x8078ff78, 0x00000400,
+       0x8078ff78, 0x00000200,
        0xbf0a6f7c, 0xbf85ffee,
-       0xb96f1e06, 0x876fc16f,
-       0xbf84000e, 0x8f6f836f,
-       0x806f7c6f, 0xbefe03c1,
-       0xbeff0380, 0xe0304000,
-       0x785d0000, 0xbf8c3f70,
-       0x7e008500, 0x807c817c,
-       0x8078ff78, 0x00000080,
-       0xbf0a6f7c, 0xbf85fff7,
-       0xbeff03c1, 0xe0304000,
-       0x6e5d0000, 0xe0304100,
-       0x6e5d0100, 0xe0304200,
-       0x6e5d0200, 0xe0304300,
-       0x6e5d0300, 0xbf8c3f70,
+       0xe0304000, 0x6e5d0000,
+       0xe0304080, 0x6e5d0100,
+       0xe0304100, 0x6e5d0200,
+       0xe0304180, 0x6e5d0300,
+       0xbf820032, 0xbef603ff,
+       0x01000000, 0xbeee0378,
+       0x8078ff78, 0x00000400,
+       0xbefc0384, 0xe0304000,
+       0x785d0000, 0xe0304100,
+       0x785d0100, 0xe0304200,
+       0x785d0200, 0xe0304300,
+       0x785d0300, 0xbf8c3f70,
+       0x7e008500, 0x7e028501,
+       0x7e048502, 0x7e068503,
+       0x807c847c, 0x8078ff78,
+       0x00000400, 0xbf0a6f7c,
+       0xbf85ffee, 0xb96f1e06,
+       0x876fc16f, 0xbf84000e,
+       0x8f6f836f, 0x806f7c6f,
+       0xbefe03c1, 0xbeff0380,
+       0xe0304000, 0x785d0000,
+       0xbf8c3f70, 0x7e008500,
+       0x807c817c, 0x8078ff78,
+       0x00000080, 0xbf0a6f7c,
+       0xbf85fff7, 0xbeff03c1,
+       0xe0304000, 0x6e5d0000,
+       0xe0304100, 0x6e5d0100,
+       0xe0304200, 0x6e5d0200,
+       0xe0304300, 0x6e5d0300,
+       0xbf8c3f70, 0xb9782a05,
+       0x80788178, 0xbf0d9972,
+       0xbf850002, 0x8f788978,
+       0xbf820001, 0x8f788a78,
+       0xb96e1e06, 0x8f6e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0x80f8ff78,
+       0x00000050, 0xbef603ff,
+       0x01000000, 0xbefc03ff,
+       0x0000006c, 0x80f89078,
+       0xf429003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc847c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0x80f8a078,
+       0xf42d003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc887c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0xbe843104,
+       0xbe863106, 0x80f8c078,
+       0xf431003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc907c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0xbe843104,
+       0xbe863106, 0xbe883108,
+       0xbe8a310a, 0xbe8c310c,
+       0xbe8e310e, 0xbf06807c,
+       0xbf84fff0, 0xba80f801,
+       0x00000000, 0xbf8a0000,
        0xb9782a05, 0x80788178,
        0xbf0d9972, 0xbf850002,
        0x8f788978, 0xbf820001,
        0x8f788a78, 0xb96e1e06,
        0x8f6e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
-       0x80f8ff78, 0x00000050,
        0xbef603ff, 0x01000000,
-       0xbefc03ff, 0x0000006c,
-       0x80f89078, 0xf429003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc847c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0x80f8a078, 0xf42d003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc887c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0xbe843104, 0xbe863106,
-       0x80f8c078, 0xf431003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc907c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0xbe843104, 0xbe863106,
-       0xbe883108, 0xbe8a310a,
-       0xbe8c310c, 0xbe8e310e,
-       0xbf06807c, 0xbf84fff0,
-       0xba80f801, 0x00000000,
-       0xbf8a0000, 0xb9782a05,
-       0x80788178, 0xbf0d9972,
-       0xbf850002, 0x8f788978,
-       0xbf820001, 0x8f788a78,
-       0xb96e1e06, 0x8f6e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0xbef603ff,
-       0x01000000, 0xf4211bfa,
+       0xf4211bfa, 0xf0000000,
+       0x80788478, 0xf4211b3a,
        0xf0000000, 0x80788478,
-       0xf4211b3a, 0xf0000000,
-       0x80788478, 0xf4211b7a,
+       0xf4211b7a, 0xf0000000,
+       0x80788478, 0xf4211c3a,
        0xf0000000, 0x80788478,
-       0xf4211c3a, 0xf0000000,
-       0x80788478, 0xf4211c7a,
+       0xf4211c7a, 0xf0000000,
+       0x80788478, 0xf4211eba,
        0xf0000000, 0x80788478,
-       0xf4211eba, 0xf0000000,
-       0x80788478, 0xf4211efa,
+       0xf4211efa, 0xf0000000,
+       0x80788478, 0xf4211e7a,
        0xf0000000, 0x80788478,
-       0xf4211e7a, 0xf0000000,
-       0x80788478, 0xf4211cfa,
+       0xf4211cfa, 0xf0000000,
+       0x80788478, 0xf4211bba,
        0xf0000000, 0x80788478,
+       0xbf8cc07f, 0xb9eef814,
        0xf4211bba, 0xf0000000,
        0x80788478, 0xbf8cc07f,
-       0xb9eef814, 0xf4211bba,
-       0xf0000000, 0x80788478,
-       0xbf8cc07f, 0xb9eef815,
-       0xbefc036f, 0xbefe0370,
-       0xbeff0371, 0x876f7bff,
-       0x000003ff, 0xb9ef4803,
-       0x876f7bff, 0xfffff800,
-       0x906f8b6f, 0xb9efa2c3,
-       0xb9f3f801, 0xb96e2a05,
-       0x806e816e, 0xbf0d9972,
-       0xbf850002, 0x8f6e896e,
-       0xbf820001, 0x8f6e8a6e,
-       0x806eff6e, 0x00000200,
-       0x806e746e, 0x826f8075,
-       0x876fff6f, 0x0000ffff,
-       0xf4091c37, 0xfa000050,
-       0xf4091d37, 0xfa000060,
-       0xf4011e77, 0xfa000074,
-       0xbf8cc07f, 0x876dff6d,
-       0x0000ffff, 0x87fe7e7e,
-       0x87ea6a6a, 0xb9faf802,
-       0xbe80226c, 0xbf810000,
+       0xb9eef815, 0xbefc036f,
+       0xbefe0370, 0xbeff0371,
+       0x876f7bff, 0x000003ff,
+       0xb9ef4803, 0x876f7bff,
+       0xfffff800, 0x906f8b6f,
+       0xb9efa2c3, 0xb9f3f801,
+       0xb96e2a05, 0x806e816e,
+       0xbf0d9972, 0xbf850002,
+       0x8f6e896e, 0xbf820001,
+       0x8f6e8a6e, 0x806eff6e,
+       0x00000200, 0x806e746e,
+       0x826f8075, 0x876fff6f,
+       0x0000ffff, 0xf4091c37,
+       0xfa000050, 0xf4091d37,
+       0xfa000060, 0xf4011e77,
+       0xfa000074, 0xbf8cc07f,
+       0x876dff6d, 0x0000ffff,
+       0x87fe7e7e, 0x87ea6a6a,
+       0xb9faf802, 0xbe80226c,
+       0xbf810000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
-       0xbf9f0000, 0x00000000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 69721aea57b6..06947a8767c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -83,6 +83,9 @@ var S_SAVE_BUF_RSRC_WORD3_MISC                        = 
0x10807FAC
 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK            = 0x04000000
 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT           = 26
 
+var S_SAVE_PC_HI_FIRST_WAVE_MASK               = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT              = 31
+
 var s_sgpr_save_num                            = 108
 
 var s_save_spi_init_lo                         = exec_lo
@@ -245,6 +248,11 @@ L_SLEEP:
        s_waitcnt       lgkmcnt(0)
 #endif
 
+       // Save first_wave flag so we can clear high bits of save address.
+       s_and_b32       s_save_tmp, s_save_spi_init_hi, 
S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+       s_lshl_b32      s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT 
- S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+       s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
        // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch 
logic
        // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
        get_wave_size(s_save_ttmps_hi)
@@ -331,7 +339,8 @@ L_SAVE_HWREG:
 
        write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
        write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
-       write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+       s_andn2_b32     s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+       write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
        write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
        write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
        write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
@@ -440,7 +449,7 @@ L_SAVE_LDS_NORMAL:
        s_cbranch_scc0  L_SAVE_LDS_DONE                                         
//no lds used? jump to L_SAVE_DONE
 
        s_barrier                                                               
//LDS is used? wait for other waves in the same TG
-       s_and_b32       s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+       s_and_b32       s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
        s_cbranch_scc0  L_SAVE_LDS_DONE
 
        // first wave do LDS save;
@@ -1002,7 +1011,6 @@ end
 function get_wave_size(s_reg)
        s_getreg_b32    s_reg, 
hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
        s_lshl_b32      s_reg, s_reg, S_WAVE_SIZE
-       s_or_b32        s_reg, s_save_spi_init_hi, s_reg                        
//share with exec_hi, it's at bit25
 end
 
 function save_and_clear_ib_sts(tmp1, tmp2)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to