Anthony Gutierrez has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/29926 )

Change subject: arch-gcn3: Updating implementation of atomics
......................................................................

arch-gcn3: Updating implementation of atomics

This changeset is moving the access of the data operand
from initiateAcc to the execute method of atomic instructions.

Change-Id: I1debae302f0b13f79ed2b7a9ed2f6b07fcec5128
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29926
Reviewed-by: Anthony Gutierrez <anthony.gutier...@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutier...@amd.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/gcn3/insts/instructions.cc
1 file changed, 45 insertions(+), 52 deletions(-)

Approvals:
  Anthony Gutierrez: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc
index 26af241..32719ad 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -39261,11 +39261,24 @@
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+        ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);

         addr.read();
+        data.read();
+        cmp.read();

         calcAddr(gpuDynInst, addr);

+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
+                    = data[lane];
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = cmp[lane];
+            }
+        }
+
         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
             /**
@@ -39293,21 +39306,6 @@
     void
     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
-        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
-        ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
-
-        data.read();
-        cmp.read();
-
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (gpuDynInst->exec_mask[lane]) {
-                (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
-                    = data[lane];
-                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
-                    = cmp[lane];
-            }
-        }
-
         initAtomicAccess<VecElemU32>(gpuDynInst);
     } // initiateAcc

@@ -39364,11 +39362,20 @@
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);

         addr.read();
+        data.read();

         calcAddr(gpuDynInst, addr);

+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
             gpuDynInst->computeUnit()->globalMemoryPipe.
                 issueRequest(gpuDynInst);
@@ -39387,17 +39394,6 @@
     void
     Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
-        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
-
-        data.read();
-
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (gpuDynInst->exec_mask[lane]) {
-                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
-                    = data[lane];
-            }
-        }
-
         initAtomicAccess<VecElemU32>(gpuDynInst);
     } // initiateAcc

@@ -39733,11 +39729,24 @@
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU64 data(gpuDynInst, extData.DATA);
+        ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);

         addr.read();
+        data.read();
+        cmp.read();

         calcAddr(gpuDynInst, addr);

+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
+                    = data[lane];
+                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
+                    = cmp[lane];
+            }
+        }
+
         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
             /**
@@ -39765,21 +39774,6 @@
     void
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
-        ConstVecOperandU64 data(gpuDynInst, extData.DATA);
-        ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
-
-        data.read();
-        cmp.read();
-
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (gpuDynInst->exec_mask[lane]) {
-                (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
-                    = data[lane];
-                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
-                    = cmp[lane];
-            }
-        }
-
         initAtomicAccess<VecElemU64>(gpuDynInst);
     } // initiateAcc

@@ -39837,10 +39831,20 @@
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU64 data(gpuDynInst, extData.DATA);

         addr.read();
+        data.read();

         calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
             gpuDynInst->computeUnit()->globalMemoryPipe.
                 issueRequest(gpuDynInst);
@@ -39859,17 +39863,6 @@
     void
     Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
-        ConstVecOperandU64 data(gpuDynInst, extData.DATA);
-
-        data.read();
-
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (gpuDynInst->exec_mask[lane]) {
-                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
-                    = data[lane];
-            }
-        }
-
         initAtomicAccess<VecElemU64>(gpuDynInst);
     } // initiateAcc


--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29926
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I1debae302f0b13f79ed2b7a9ed2f6b07fcec5128
Gerrit-Change-Number: 29926
Gerrit-PatchSet: 6
Gerrit-Owner: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Alexandru Duțu <alexandru.d...@amd.com>
Gerrit-Reviewer: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Tony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Tuan Ta <q...@cornell.edu>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to