[gem5-dev] Change in gem5/gem5[develop]: arch-arm: Fix SVE indx inst by sizeof error and dest overwrite

2020-05-07 Thread Jordi Vaquero (Gerrit) via gem5-dev
Jordi Vaquero has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/28228 )


Change subject: arch-arm: Fix SVE indx inst by sizeof error and dest  
overwrite

..

arch-arm: Fix SVE indx inst by sizeof error and dest overwrite

This patch includes two fixes for SVE FMUL; FMLA FMLS AND FCMLA instructions

+ Fixes indexed functions like FMUL, FMLA, FMLS, FCMLA due to its
destination register overwrite with temporary values, wince the imm
can make changes in vector positions that will be read in the future.

+ sizeof return bytes not bits so division of 128 shouild be of 16 instead

Change-Id: I304d1b254a299069c85bbc3319e5a6d4119436d0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28228
Reviewed-by: Giacomo Travaglini 
Maintainer: Giacomo Travaglini 
Tested-by: kokoro 
---
M src/arch/arm/isa/insts/sve.isa
1 file changed, 33 insertions(+), 21 deletions(-)

Approvals:
  Giacomo Travaglini: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 16597d6..9314ba9 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -1835,26 +1835,25 @@
 xc->tcBase());

 // Number of elements in a 128 bit segment
-constexpr unsigned ePerSegment = 128 / sizeof(Element);
+constexpr unsigned ePerSegment = 16 / sizeof(Element);

-'''
-
-code += '''
+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
 for (unsigned i = 0; i < eCount; i++) {
-const auto segmentBase = i - i % ePerSegment;
-const auto segmentIdx = segmentBase + index;
+const auto segmentBase = i - i %% ePerSegment;
+const auto segmentIdx = segmentBase + index;

-const Element& srcElem1 = AA64FpOp1_x[i];
-const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
-Element destElem = 0;
+const Element& srcElem1 = AA64FpOp1_x[i];
+const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
+Element destElem = 0;

-'''
-
-code += '''
-%(op)s
-AA64FpDest_x[i] = destElem;
+%(op)s
+auxDest[i] = destElem;
 }
-''' % {'op': op}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
+}''' % {'op':op}

 baseClass = 'SveBinIdxUnpredOp'

@@ -2067,8 +2066,10 @@
 xc->tcBase());

 // Number of elements in a 128 bit segment
-constexpr unsigned ePerSegment = 128 / sizeof(Element);
+constexpr unsigned ePerSegment = 16 / sizeof(Element);

+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
 for (unsigned i = 0; i < eCount; i++) {
 const auto segmentBase = i - i % ePerSegment;
 const auto segmentIdx = segmentBase + index;
@@ -2077,10 +2078,13 @@
 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
 Element destElem = AA64FpDestMerge_x[i];
 '''
-
 code += '''
 %(op)s
-AA64FpDest_x[i] = destElem;
+auxDest[i] = destElem;
+}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
 }''' % {'op': op}

 iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
@@ -3024,6 +3028,9 @@
 code += '''
 uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
 code += '''
+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
+
 for (int i = 0; i < eCount / 2; ++i) {'''
 if predType == PredType.NONE:
 code += '''
@@ -3067,9 +3074,14 @@
 code += '''
 }'''
 code += '''
-AA64FpDest_x[2 * i] = addend_r;
-AA64FpDest_x[2 * i + 1] = addend_i;
-}'''
+auxDest[2 * i] = addend_r;
+auxDest[2 * i + 1] = addend_i;
+}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
+}
+'''
 iop = InstObjParams(name, 'Sve' + Name,
 'SveComplexIdxOp' if predType == PredType.NONE
   else 'SveComplexOp',

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28228
To unsubscribe, or for help writing mail filters, visit  
https://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I304d1b254a299069c85bbc3319e5a6d4119436d0
Gerrit-Change-Number: 28228
Gerrit-PatchSet: 4
Gerrit-Owner: Jordi Vaquero 
Gerrit-Reviewer: Giacomo Travaglini 
Gerrit-Reviewer: Jordi Vaquero 
Gerrit-Reviewer: kokoro 
Gerrit-MessageType: merged
___
gem5-dev 

[gem5-dev] Change in gem5/gem5[develop]: arch-arm: Fix SVE indx inst by sizeof error and dest overwrite

2020-04-27 Thread Jordi Vaquero (Gerrit) via gem5-dev
Jordi Vaquero has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/28228 )



Change subject: arch-arm: Fix SVE indx inst by sizeof error and dest  
overwrite

..

arch-arm: Fix SVE indx inst by sizeof error and dest overwrite

This patch includes two fixes for SVE FMUL; FMLA FMLS AND FCMLA instructions

+ Fixes indexed functions like FMUL, FMLA, FMLS, FCMLA due to its
destination register overwrite with temporary values, wince the imm
can make changes in vector positions that will be read in the future.

+ sizeof return bytes not bits so division of 128 shouild be of 16 instead

Change-Id: I304d1b254a299069c85bbc3319e5a6d4119436d0
---
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/insts/sve.isa
2 files changed, 30 insertions(+), 12 deletions(-)



diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa  
b/src/arch/arm/isa/formats/sve_2nd_level.isa

index b6f8340..53fd80d 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2799,12 +2799,12 @@
 case 2:
 zm = (IntRegIndex) (uint8_t) bits(machInst, 18, 16);
 imm = bits(machInst, 20, 19);
-return new SveFcmlai(machInst,
+return new SveFcmlai(machInst,
 zda, zn, zm, rot, imm);
 case 3:
 zm = (IntRegIndex) (uint8_t) bits(machInst, 19, 16);
 imm = bits(machInst, 20);
-return new SveFcmlai(machInst,
+return new SveFcmlai(machInst,
 zda, zn, zm, rot, imm);
 }
 return new Unknown64(machInst);
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index b4c7fe5..870ecfd 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -1813,11 +1813,13 @@
 xc->tcBase());

 // Number of elements in a 128 bit segment
-constexpr unsigned ePerSegment = 128 / sizeof(Element);
+constexpr unsigned ePerSegment = 16 / sizeof(Element);

 '''

 code += '''
+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
 for (unsigned i = 0; i < eCount; i++) {
 const auto segmentBase = i - i % ePerSegment;
 const auto segmentIdx = segmentBase + index;
@@ -1830,9 +1832,12 @@

 code += '''
 %(op)s
-AA64FpDest_x[i] = destElem;
+auxDest[i] = destElem;
 }
-''' % {'op': op}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
+}''' % {'op': op}

 baseClass = 'SveBinIdxUnpredOp'

@@ -2045,8 +2050,10 @@
 xc->tcBase());

 // Number of elements in a 128 bit segment
-constexpr unsigned ePerSegment = 128 / sizeof(Element);
+constexpr unsigned ePerSegment = 16 / sizeof(Element);

+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
 for (unsigned i = 0; i < eCount; i++) {
 const auto segmentBase = i - i % ePerSegment;
 const auto segmentIdx = segmentBase + index;
@@ -2055,10 +2062,13 @@
 const Element& srcElem2 = AA64FpOp2_x[segmentIdx];
 Element destElem = AA64FpDestMerge_x[i];
 '''
-
 code += '''
 %(op)s
-AA64FpDest_x[i] = destElem;
+auxDest[i] = destElem;
+}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
 }''' % {'op': op}

 iop = InstObjParams(name, 'Sve' + Name, 'SveBinIdxUnpredOp',
@@ -2992,6 +3002,9 @@
 code += '''
 uint32_t eltspersegment = 16 / (2 * sizeof(Element));'''
 code += '''
+ArmISA::VecRegContainer tmpC;
+auto auxDest = tmpC.as();
+
 for (int i = 0; i < eCount / 2; ++i) {'''
 if predType == PredType.NONE:
 code += '''
@@ -3035,9 +3048,14 @@
 code += '''
 }'''
 code += '''
-AA64FpDest_x[2 * i] = addend_r;
-AA64FpDest_x[2 * i + 1] = addend_i;
-}'''
+auxDest[2 * i] = addend_r;
+auxDest[2 * i + 1] = addend_i;
+}
+
+for (unsigned i = 0; i < eCount; i++) {
+AA64FpDest_x[i] = auxDest[i];
+}
+'''
 iop = InstObjParams(name, 'Sve' + Name,
 'SveComplexIdxOp' if predType == PredType.NONE
   else 'SveComplexOp',
@@ -3558,7 +3576,7 @@
 sveCmpInst('fcmuo', 'Fcmuo', 'SimdFloatCmpOp', fpTypes, fcmuoCode)
 # FCMLA (indexed)
 sveComplexMulAddInst('fcmla', 'Fcmlai', 'SimdFloatMultAccOp',
-fpTypes[1:], predType = PredType.NONE)
+fpTypes[:2], predType = PredType.NONE)
 # FCMLA (vectors)
 sveComplexMulAddInst('fcmla',