Hello Nikos Nikoleris,

I'd like you to do a code review. Please visit

    https://gem5-review.googlesource.com/c/public/gem5/+/28108

to review the following change.


Change subject: arch-arm: SVE instructions do not use AHP format
......................................................................

arch-arm: SVE instructions do not use AHP format

SVE half-precision floating-point instructions support only IEEE
754-2008 half-precision format and ignore the value of the FPCR.AHP bit,
behaving as if it has an Effective value of 0.

This patch is addressing this by masking the FPSCR.AHB bit before
passing it to fplib.

Change-Id: I1432fc3f7fefb81445fe042ae7d681f5cec40e64
Signed-off-by: Giacomo Travaglini <giacomo.travagl...@arm.com>
Reviewed-by: Nikos Nikoleris <nikos.nikole...@arm.com>
---
M src/arch/arm/isa/insts/sve.isa
M src/arch/arm/miscregs.hh
2 files changed, 11 insertions(+), 9 deletions(-)



diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index b4c7fe5..deb12bc 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -2949,7 +2949,7 @@
                 if (sub_i) {
                     elt2_i = fplibNeg<Element>(elt2_i);
                 }
-                fpscr =  (FPSCR) FpscrExc;
+                fpscr = FpscrExc & ~FpscrAhpMask;
                 acc_r = fplibAdd<Element>(acc_r, elt2_i, fpscr);
                 FpscrExc = fpscr;
             }
@@ -2957,7 +2957,7 @@
                 if (sub_r) {
                     elt2_r = fplibNeg<Element>(elt2_r);
                 }
-                fpscr =  (FPSCR) FpscrExc;
+                fpscr = FpscrExc & ~FpscrAhpMask;
                 acc_i = fplibAdd<Element>(acc_i, elt2_r, fpscr);
                 FpscrExc = fpscr;
             }
@@ -3015,7 +3015,7 @@
             if (neg_r) {
                 elt2_a = fplibNeg<Element>(elt2_a);
             }
-            fpscr =  (FPSCR) FpscrExc;
+            fpscr = FpscrExc & ~FpscrAhpMask;
addend_r = fplibMulAdd<Element>(addend_r, elt1_a, elt2_a, fpscr);
             FpscrExc = fpscr;'''
         if predType != PredType.NONE:
@@ -3028,7 +3028,7 @@
             if (neg_i) {
                 elt2_b = fplibNeg<Element>(elt2_b);
             }
-            fpscr =  (FPSCR) FpscrExc;
+            fpscr = FpscrExc & ~FpscrAhpMask;
addend_i = fplibMulAdd<Element>(addend_i, elt1_a, elt2_b, fpscr);
             FpscrExc = fpscr;'''
         if predType != PredType.NONE:
@@ -3466,7 +3466,7 @@
     sveExtInst('ext', 'Ext', 'SimdAluOp')
     # FABD
     fpOp = '''
-            FPSCR fpscr = (FPSCR) FpscrExc;
+            FPSCR fpscr = FpscrExc & ~FpscrAhpMask;
             destElem = %s;
             FpscrExc = fpscr;
     '''
@@ -3497,7 +3497,7 @@
sveBinInst('fadd', 'FaddUnpred', 'SimdFloatAddOp', floatTypes, faddCode)
     # FADDA
     fpAddaOp = '''
-            FPSCR fpscr = (FPSCR) FpscrExc;
+            FPSCR fpscr = FpscrExc & ~FpscrAhpMask;
             destElem = fplibAdd<Element>(destElem, srcElem1, fpscr);
             FpscrExc = FpscrExc | fpscr;
     '''
@@ -3505,7 +3505,7 @@
             fpAddaOp)
     # FADDV
     fpReduceOp = '''
-            FPSCR fpscr = (FPSCR) FpscrExc;
+            FPSCR fpscr = FpscrExc & ~FpscrAhpMask;
             destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
             FpscrExc = FpscrExc | fpscr;
     '''
diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh
index 3900a4c..550b51c 100644
--- a/src/arch/arm/miscregs.hh
+++ b/src/arch/arm/miscregs.hh
@@ -1936,10 +1936,12 @@
// This mask selects bits of the FPSCR that actually go in the FpCondCodes
     // integer register to allow renaming.
     static const uint32_t FpCondCodesMask = 0xF0000000;
-    // This mask selects the cumulative FP exception flags of the FPSCR.
-    static const uint32_t FpscrExcMask = 0x0000009F;
     // This mask selects the cumulative saturation flag of the FPSCR.
     static const uint32_t FpscrQcMask = 0x08000000;
+    // This mask selects the AHP bit of the FPSCR.
+    static const uint32_t FpscrAhpMask = 0x04000000;
+    // This mask selects the cumulative FP exception flags of the FPSCR.
+    static const uint32_t FpscrExcMask = 0x0000009F;

     /**
      * Check for permission to read coprocessor registers.

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28108
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I1432fc3f7fefb81445fe042ae7d681f5cec40e64
Gerrit-Change-Number: 28108
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Nikos Nikoleris <nikos.nikole...@arm.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to