changeset f1e198a028be in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=f1e198a028be
description:
cpu, arm: Distinguish Float* and SimdFloat*, create FloatMem* opClass
Modify the opClass assigned to AArch64 FP instructions from SimdFloat*
to
Float*. Also create the FloatMemRead and FloatMemWrite opClasses, which
distinguishes writes to the INT and FP register banks.
Change the latency of (Simd)FloatMultAcc to 5, based on the Cortex-A72,
where the "latency" of FMADD is 3 if the next instruction is a FMADD and
has only the augend to destination dependency, otherwise it's 7 cycles.
Signed-off-by: Jason Lowe-Power <[email protected]>
diffstat:
configs/common/O3_ARM_v7a.py | 12 +++-
src/arch/arm/isa/insts/fp64.isa | 91 ++++++++++++++++++++--------------------
src/arch/isa_parser.py | 16 ++++++-
src/cpu/FuncUnit.py | 6 +-
src/cpu/minor/MinorCPU.py | 7 +-
src/cpu/o3/FuncUnitConfig.py | 11 +++-
src/cpu/op_class.hh | 4 +
7 files changed, 88 insertions(+), 59 deletions(-)
diffs (truncated from 469 to 300 lines):
diff -r 612f75cf36a0 -r f1e198a028be configs/common/O3_ARM_v7a.py
--- a/configs/common/O3_ARM_v7a.py Fri Oct 14 10:37:38 2016 -0400
+++ b/configs/common/O3_ARM_v7a.py Sat Oct 15 14:58:45 2016 -0500
@@ -62,24 +62,28 @@
OpDesc(opClass='SimdFloatDiv', opLat=3),
OpDesc(opClass='SimdFloatMisc', opLat=3),
OpDesc(opClass='SimdFloatMult', opLat=3),
- OpDesc(opClass='SimdFloatMultAcc',opLat=1),
+ OpDesc(opClass='SimdFloatMultAcc',opLat=5),
OpDesc(opClass='SimdFloatSqrt', opLat=9),
OpDesc(opClass='FloatAdd', opLat=5),
OpDesc(opClass='FloatCmp', opLat=5),
OpDesc(opClass='FloatCvt', opLat=5),
OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
- OpDesc(opClass='FloatMult', opLat=4) ]
+ OpDesc(opClass='FloatMult', opLat=4),
+ OpDesc(opClass='FloatMultAcc', opLat=5),
+ OpDesc(opClass='FloatMisc', opLat=3) ]
count = 2
# Load/Store Units
class O3_ARM_v7a_Load(FUDesc):
- opList = [ OpDesc(opClass='MemRead',opLat=2) ]
+ opList = [ OpDesc(opClass='MemRead',opLat=2),
+ OpDesc(opClass='FloatMemRead',opLat=2) ]
count = 1
class O3_ARM_v7a_Store(FUDesc):
- opList = [OpDesc(opClass='MemWrite',opLat=2) ]
+ opList = [ OpDesc(opClass='MemWrite',opLat=2),
+ OpDesc(opClass='FloatMemWrite',opLat=2) ]
count = 1
# Functional Units for this CPU
diff -r 612f75cf36a0 -r f1e198a028be src/arch/arm/isa/insts/fp64.isa
--- a/src/arch/arm/isa/insts/fp64.isa Fri Oct 14 10:37:38 2016 -0400
+++ b/src/arch/arm/isa/insts/fp64.isa Sat Oct 15 14:58:45 2016 -0500
@@ -52,7 +52,7 @@
'''
fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
{ "code": fmovImmSCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegImmOpDeclare.subst(fmovImmSIop);
decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
exec_output += BasicExecute.subst(fmovImmSIop);
@@ -65,7 +65,7 @@
'''
fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
{ "code": fmovImmDCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegImmOpDeclare.subst(fmovImmDIop);
decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
exec_output += BasicExecute.subst(fmovImmDIop);
@@ -78,7 +78,7 @@
'''
fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
{ "code": fmovRegSCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
exec_output += BasicExecute.subst(fmovRegSIop);
@@ -91,7 +91,7 @@
'''
fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
{ "code": fmovRegDCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
exec_output += BasicExecute.subst(fmovRegDIop);
@@ -104,7 +104,7 @@
'''
fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
{ "code": fmovCoreRegWCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
exec_output += BasicExecute.subst(fmovCoreRegWIop);
@@ -117,7 +117,7 @@
'''
fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
{ "code": fmovCoreRegXCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
exec_output += BasicExecute.subst(fmovCoreRegXIop);
@@ -128,7 +128,7 @@
'''
fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
{ "code": fmovUCoreRegXCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
exec_output += BasicExecute.subst(fmovUCoreRegXIop);
@@ -138,7 +138,7 @@
'''
fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
{ "code": fmovRegCoreWCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
exec_output += BasicExecute.subst(fmovRegCoreWIop);
@@ -148,7 +148,7 @@
'''
fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
{ "code": fmovRegCoreXCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
exec_output += BasicExecute.subst(fmovRegCoreXIop);
@@ -158,7 +158,7 @@
'''
fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
{ "code": fmovURegCoreXCode,
- "op_class": "SimdFloatMiscOp" }, [])
+ "op_class": "FloatMiscOp" }, [])
header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
exec_output += BasicExecute.subst(fmovURegCoreXIop);
@@ -270,16 +270,16 @@
decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
exec_output += BasicExecute.subst(iop)
- buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
+ buildTernaryFpOp("FMAdd", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
- buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
+ buildTernaryFpOp("FMSub", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1),
cOp2, fpscr)",
"fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1),
cOp2, fpscr)" )
- buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
+ buildTernaryFpOp("FNMAdd", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3),
fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3),
fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
- buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
+ buildTernaryFpOp("FNMSub", "FloatMultAccOp",
"fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1,
cOp2, fpscr)",
"fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1,
cOp2, fpscr)" )
@@ -304,31 +304,31 @@
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
- buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
+ buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "FloatAddOp",
"fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
"fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
+ buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "FloatAddOp",
"fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
"fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
+ buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "FloatDivOp",
"fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
"fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "FloatMultOp",
"fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
+ buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "FloatMultOp",
"fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
"fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
- buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
+ buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "FloatCmpOp",
"fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
+ buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "FloatCmpOp",
"fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
- buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+ buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "FloatCmpOp",
"fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
"fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
@@ -354,7 +354,7 @@
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
- buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
+ buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "FloatSqrtOp",
"fplibSqrt<uint32_t>(cOp1, fpscr)",
"fplibSqrt<uint64_t>(cOp1, fpscr)")
def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
@@ -383,29 +383,29 @@
decoder_output += constructorTempl.subst(iop)
exec_output += BasicExecute.subst(iop)
- buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "FloatMiscOp",
"fplibNeg<uint32_t>(cOp1)",
"fplibNeg<uint64_t>(cOp1)")
- buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "FloatMiscOp",
"fplibAbs<uint32_t>(cOp1)",
"fplibAbs<uint64_t>(cOp1)")
- buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN,
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN,
false, fpscr)")
- buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF,
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF,
false, fpscr)")
- buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF,
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF,
false, fpscr)")
- buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO,
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO,
false, fpscr)")
- buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY,
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY,
false, fpscr)")
- buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr),
false, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr),
false, fpscr)")
- buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
+ buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "FloatMiscOp",
"fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr),
true, fpscr)",
"fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr),
true, fpscr)")
}};
@@ -451,8 +451,8 @@
instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else
"S")
mnem = "%scvtf" %(us.lower())
fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
- { "code": fcvtIntFpDCode,
- "op_class": "SimdFloatCvtOp"
}, [])
+ { "code": fcvtIntFpDCode,
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
decoder_output +=
AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
exec_output += BasicExecute.subst(fcvtIntFpDIop);
@@ -491,7 +491,7 @@
mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
{ "code": fcvtFpIntCode,
- "op_class": "SimdFloatCvtOp" }, [])
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop);
decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
exec_output += BasicExecute.subst(fcvtFpIntIop);
@@ -514,7 +514,7 @@
'''
fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
{ "code": fcvtFpSFpDCode,
- "op_class": "SimdFloatCvtOp" }, [])
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
exec_output += BasicExecute.subst(fcvtFpSFpDIop);
@@ -531,7 +531,7 @@
'''
fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
{"code": fcvtFpDFpSCode,
- "op_class": "SimdFloatCvtOp" }, [])
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
exec_output += BasicExecute.subst(fcvtFpDFpSIop);
@@ -563,7 +563,7 @@
instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code,
- "op_class": "SimdFloatCvtOp" }, [])
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
exec_output += BasicExecute.subst(fcvtFpHFpIop);
@@ -586,7 +586,7 @@
instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
{ "code": code,
- "op_class": "SimdFloatCvtOp" }, [])
+ "op_class": "FloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
exec_output += BasicExecute.subst(fcvtFpFpHIop);
@@ -626,7 +626,7 @@
fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName,
"FpReg%sOp" %(typeName),
{"code": fcmpCode,
- "op_class": "SimdFloatCmpOp"}, [])
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev