changeset 9b880b40ac10 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=9b880b40ac10
description:
        O3: Make all instructions that write a misc. register not perform the 
write until commit.

        ARM instructions updating cumulative flags (ARM FP exceptions and 
saturation
        flags) are not serialized.

        Added aliases for ARM FP exceptions and saturation flags in FPSCR.  
Removed
        write accesses to the FP condition codes for most ARM VFP instructions: 
only
        VCMP and VCMPE instructions update the FP condition codes.  Removed a 
potential
        cause of seg. faults in the O3 model for NEON memory macro-ops (ARM).

diffstat:

 src/arch/alpha/isa/fp.isa           |    2 +-
 src/arch/alpha/locked_mem.hh        |   10 +-
 src/arch/arm/isa.cc                 |   17 ++-
 src/arch/arm/isa/insts/fp.isa       |  233 +++++++++++++++++++----------------
 src/arch/arm/isa/insts/misc.isa     |    4 +-
 src/arch/arm/isa/insts/neon.isa     |  220 ++++++++++++++++----------------
 src/arch/arm/isa/operands.isa       |    2 +
 src/arch/arm/miscregs.hh            |   11 +-
 src/arch/mips/locked_mem.hh         |   10 +-
 src/cpu/base_dyn_inst.hh            |    3 +-
 src/cpu/inorder/inorder_dyn_inst.cc |   52 +------
 src/cpu/o3/commit_impl.hh           |   15 ++
 src/cpu/o3/dyn_inst.hh              |   83 +++++++-----
 src/cpu/o3/dyn_inst_impl.hh         |   14 ++
 src/cpu/simple/base.hh              |   17 --
 15 files changed, 361 insertions(+), 332 deletions(-)

diffs (truncated from 2366 to 300 lines):

diff -r 9b87755cb699 -r 9b880b40ac10 src/arch/alpha/isa/fp.isa
--- a/src/arch/alpha/isa/fp.isa Tue Dec 07 16:19:57 2010 -0800
+++ b/src/arch/alpha/isa/fp.isa Tue Dec 07 16:19:57 2010 -0800
@@ -229,7 +229,7 @@
             %(code)s;
         } else {
             m5_fesetround(getC99RoundingMode(
-                           xc->readMiscRegNoEffect(MISCREG_FPCR)));
+                           xc->readMiscReg(MISCREG_FPCR)));
             %(code)s;
             m5_fesetround(M5_FE_TONEAREST);
         }
diff -r 9b87755cb699 -r 9b880b40ac10 src/arch/alpha/locked_mem.hh
--- a/src/arch/alpha/locked_mem.hh      Tue Dec 07 16:19:57 2010 -0800
+++ b/src/arch/alpha/locked_mem.hh      Tue Dec 07 16:19:57 2010 -0800
@@ -55,8 +55,8 @@
 inline void
 handleLockedRead(XC *xc, Request *req)
 {
-    xc->setMiscRegNoEffect(MISCREG_LOCKADDR, req->getPaddr() & ~0xf);
-    xc->setMiscRegNoEffect(MISCREG_LOCKFLAG, true);
+    xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr() & ~0xf);
+    xc->setMiscReg(MISCREG_LOCKFLAG, true);
 }
 
 
@@ -70,13 +70,13 @@
         req->setExtraData(2);
     } else {
         // standard store conditional
-        bool lock_flag = xc->readMiscRegNoEffect(MISCREG_LOCKFLAG);
-        Addr lock_addr = xc->readMiscRegNoEffect(MISCREG_LOCKADDR);
+        bool lock_flag = xc->readMiscReg(MISCREG_LOCKFLAG);
+        Addr lock_addr = xc->readMiscReg(MISCREG_LOCKADDR);
         if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) {
             // Lock flag not set or addr mismatch in CPU;
             // don't even bother sending to memory system
             req->setExtraData(0);
-            xc->setMiscRegNoEffect(MISCREG_LOCKFLAG, false);
+            xc->setMiscReg(MISCREG_LOCKFLAG, false);
             // the rest of this code is not architectural;
             // it's just a debugging aid to help detect
             // livelock by warning on long sequences of failed
diff -r 9b87755cb699 -r 9b880b40ac10 src/arch/arm/isa.cc
--- a/src/arch/arm/isa.cc       Tue Dec 07 16:19:57 2010 -0800
+++ b/src/arch/arm/isa.cc       Tue Dec 07 16:19:57 2010 -0800
@@ -202,7 +202,10 @@
         warn("Not doing anyhting for read to miscreg %s\n",
                 miscRegName[misc_reg]);
         break;
-
+      case MISCREG_FPSCR_QC:
+        return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask;
+      case MISCREG_FPSCR_EXC:
+        return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask;
     }
     return readMiscRegNoEffect(misc_reg);
 }
@@ -304,6 +307,18 @@
                          (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask);
             }
             break;
+          case MISCREG_FPSCR_QC:
+            {
+                newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask);
+                misc_reg = MISCREG_FPSCR;
+            }
+            break;
+          case MISCREG_FPSCR_EXC:
+            {
+                newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask);
+                misc_reg = MISCREG_FPSCR;
+            }
+            break;
           case MISCREG_FPEXC:
             {
                 const uint32_t fpexcMask = 0x60000000;
diff -r 9b87755cb699 -r 9b880b40ac10 src/arch/arm/isa/insts/fp.isa
--- a/src/arch/arm/isa/insts/fp.isa     Tue Dec 07 16:19:57 2010 -0800
+++ b/src/arch/arm/isa/insts/fp.isa     Tue Dec 07 16:19:57 2010 -0800
@@ -208,7 +208,8 @@
     vmsrFpscrIop = InstObjParams("vmsr", "VmsrFpscr", "FpRegRegOp",
                                  { "code": vmsrFpscrCode,
                                    "predicate_test": predicateTest,
-                                   "op_class": "SimdFloatMiscOp" }, [])
+                                   "op_class": "SimdFloatMiscOp" },
+                                 ["IsSerializeAfter","IsNonSpeculative"])
     header_output += FpRegRegOpDeclare.subst(vmsrFpscrIop);
     decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop);
     exec_output += PredOpExecute.subst(vmsrFpscrIop);
@@ -217,7 +218,8 @@
                             { "code": vmrsEnabledCheckCode + \
                                     "Dest = MiscOp1;",
                               "predicate_test": predicateTest,
-                              "op_class": "SimdFloatMiscOp" }, [])
+                              "op_class": "SimdFloatMiscOp" },
+                            ["IsSerializeBefore"])
     header_output += FpRegRegOpDeclare.subst(vmrsIop);
     decoder_output += FpRegRegOpConstructor.subst(vmrsIop);
     exec_output += PredOpExecute.subst(vmrsIop);
@@ -226,7 +228,8 @@
                                  { "code": vmrsEnabledCheckCode + \
                                            "Dest = Fpscr | FpCondCodes;",
                                    "predicate_test": predicateTest,
-                                   "op_class": "SimdFloatMiscOp" }, [])
+                                   "op_class": "SimdFloatMiscOp" },
+                                 ["IsSerializeBefore"])
     header_output += FpRegRegOpDeclare.subst(vmrsFpscrIop);
     decoder_output += FpRegRegOpConstructor.subst(vmrsFpscrIop);
     exec_output += PredOpExecute.subst(vmrsFpscrIop);
@@ -237,7 +240,8 @@
     vmrsApsrIop = InstObjParams("vmrs", "VmrsApsr", "FpRegRegImmOp",
                                 { "code": vmrsApsrCode,
                                   "predicate_test": predicateTest,
-                                  "op_class": "SimdFloatMiscOp" }, [])
+                                  "op_class": "SimdFloatMiscOp" },
+                                ["IsSerializeBefore"])
     header_output += FpRegRegImmOpDeclare.subst(vmrsApsrIop);
     decoder_output += FpRegRegImmOpConstructor.subst(vmrsApsrIop);
     exec_output += PredOpExecute.subst(vmrsApsrIop);
@@ -249,7 +253,8 @@
     vmrsApsrFpscrIop = InstObjParams("vmrs", "VmrsApsrFpscr", "FpRegRegImmOp",
                                      { "code": vmrsApsrFpscrCode,
                                        "predicate_test": predicateTest,
-                                       "op_class": "SimdFloatMiscOp" }, [])
+                                       "op_class": "SimdFloatMiscOp" },
+                                     ["IsSerializeBefore"])
     header_output += FpRegRegImmOpDeclare.subst(vmrsApsrFpscrIop);
     decoder_output += FpRegRegImmOpConstructor.subst(vmrsApsrFpscrIop);
     exec_output += PredOpExecute.subst(vmrsApsrFpscrIop);
@@ -451,20 +456,22 @@
     decoder_output = ""
     exec_output = ""
 
-    singleCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+    singleSimpleCode = vfpEnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
         FpDest = %(op)s;
-        FpCondCodes = fpscr & FpCondCodesMask;
+    '''
+    singleCode = singleSimpleCode + '''
+        FpscrExc = fpscr;
     '''
     singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
                 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
     singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
     doubleCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         double dest = %(op)s;
-        FpCondCodes = fpscr & FpCondCodesMask;
         FpDestP0.uw = dblLow(dest);
         FpDestP1.uw = dblHi(dest);
+        FpscrExc = fpscr;
     '''
     doubleBinOp = '''
         binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
@@ -545,7 +552,7 @@
         global header_output, decoder_output, exec_output
 
         sIop = InstObjParams(name + "s", Name + "S", base,
-                { "code": singleCode % { "op": singleOp },
+                { "code": singleSimpleCode % { "op": singleOp },
                   "predicate_test": predicateTest,
                   "op_class": opClass }, [])
         dIop = InstObjParams(name + "d", Name + "D", base,
@@ -574,12 +581,12 @@
     exec_output = ""
 
     vmlaSCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         float mid = binaryOp(fpscr, FpOp1, FpOp2,
                 fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
         FpDest = binaryOp(fpscr, FpDest, mid, fpAddS,
                 fpscr.fz, fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
+        FpscrExc = fpscr;
     '''
     vmlaSIop = InstObjParams("vmlas", "VmlaS", "FpRegRegRegOp",
                                      { "code": vmlaSCode,
@@ -590,16 +597,16 @@
     exec_output += PredOpExecute.subst(vmlaSIop);
 
     vmlaDCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
                                      dbl(FpOp2P0.uw, FpOp2P1.uw),
                                      fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
         double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
                                       mid, fpAddD, fpscr.fz,
                                       fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
         FpDestP0.uw = dblLow(dest);
         FpDestP1.uw = dblHi(dest);
+        FpscrExc = fpscr;
     '''
     vmlaDIop = InstObjParams("vmlad", "VmlaD", "FpRegRegRegOp",
                                      { "code": vmlaDCode,
@@ -610,12 +617,12 @@
     exec_output += PredOpExecute.subst(vmlaDIop);
 
     vmlsSCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         float mid = binaryOp(fpscr, FpOp1, FpOp2,
                 fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
         FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS,
                 fpscr.fz, fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
+        FpscrExc = fpscr;
     '''
     vmlsSIop = InstObjParams("vmlss", "VmlsS", "FpRegRegRegOp",
                                      { "code": vmlsSCode,
@@ -626,16 +633,16 @@
     exec_output += PredOpExecute.subst(vmlsSIop);
 
     vmlsDCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
                                      dbl(FpOp2P0.uw, FpOp2P1.uw),
                                      fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
         double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
                                       -mid, fpAddD, fpscr.fz,
                                       fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
         FpDestP0.uw = dblLow(dest);
         FpDestP1.uw = dblHi(dest);
+        FpscrExc = fpscr;
     '''
     vmlsDIop = InstObjParams("vmlsd", "VmlsD", "FpRegRegRegOp",
                                      { "code": vmlsDCode,
@@ -646,12 +653,12 @@
     exec_output += PredOpExecute.subst(vmlsDIop);
 
     vnmlaSCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         float mid = binaryOp(fpscr, FpOp1, FpOp2,
                 fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
         FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS,
                 fpscr.fz, fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
+        FpscrExc = fpscr;
     '''
     vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "FpRegRegRegOp",
                                      { "code": vnmlaSCode,
@@ -662,16 +669,16 @@
     exec_output += PredOpExecute.subst(vnmlaSIop);
 
     vnmlaDCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
                                      dbl(FpOp2P0.uw, FpOp2P1.uw),
                                      fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
         double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
                                       -mid, fpAddD, fpscr.fz,
                                       fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
         FpDestP0.uw = dblLow(dest);
         FpDestP1.uw = dblHi(dest);
+        FpscrExc = fpscr;
     '''
     vnmlaDIop = InstObjParams("vnmlad", "VnmlaD", "FpRegRegRegOp",
                                      { "code": vnmlaDCode,
@@ -682,12 +689,12 @@
     exec_output += PredOpExecute.subst(vnmlaDIop);
 
     vnmlsSCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         float mid = binaryOp(fpscr, FpOp1, FpOp2,
                 fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
         FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS,
                 fpscr.fz, fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
+        FpscrExc = fpscr;
     '''
     vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "FpRegRegRegOp",
                               { "code": vnmlsSCode,
@@ -698,16 +705,16 @@
     exec_output += PredOpExecute.subst(vnmlsSIop);
 
     vnmlsDCode = vfpEnabledCheckCode + '''
-        FPSCR fpscr = Fpscr | FpCondCodes;
+        FPSCR fpscr = (FPSCR) FpscrExc;
         double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
                                      dbl(FpOp2P0.uw, FpOp2P1.uw),
                                      fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
         double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
                                       mid, fpAddD, fpscr.fz,
                                       fpscr.dn, fpscr.rMode);
-        FpCondCodes = fpscr & FpCondCodesMask;
         FpDestP0.uw = dblLow(dest);
         FpDestP1.uw = dblHi(dest);
+        FpscrExc = fpscr;
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to