Boris Shingarov has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/40903 )
Change subject: arch-power: Add doubleword multiply instructions
..
arch-power: Add doubleword multiply instructions
This introduces 128-bit multiplication helpers and adds
the following instructions.
* Multiply Low Doubleword (mulld[o][.])
* Multiply High Doubleword (mulhd[.])
* Multiply High Doubleword Unsigned (mulhdu[.])
Change-Id: Id579c95468ffe5fe7b5164579ec1dfb18f0b3ab3
Signed-off-by: Sandipan Das
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40903
Reviewed-by: Boris Shingarov
Maintainer: Jason Lowe-Power
Tested-by: kokoro
---
M src/arch/power/insts/integer.hh
M src/arch/power/isa/decoder.isa
2 files changed, 85 insertions(+), 16 deletions(-)
Approvals:
Boris Shingarov: Looks good to me, approved
Jason Lowe-Power: Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/arch/power/insts/integer.hh
b/src/arch/power/insts/integer.hh
index aafbbec..95f1598 100644
--- a/src/arch/power/insts/integer.hh
+++ b/src/arch/power/insts/integer.hh
@@ -134,6 +134,49 @@
{
}
+/**
+ * Compute 128-bit product of 64-bit unsigned integer multiplication
+ * based on https://stackoverflow.com/a/28904636
+ */
+inline std::tuple
+multiply(uint64_t ra, uint64_t rb) const
+{
+uint64_t plo, phi;
+#if defined(__SIZEOF_INT128__)
+__uint128_t prod = (__uint128_t)ra * rb;
+plo = prod;
+phi = prod >> 64;
+#else
+uint64_t ralo = (uint32_t)ra, rahi = ra >> 32;
+uint64_t rblo = (uint32_t)rb, rbhi = rb >> 32;
+uint64_t pp0 = ralo * rblo;
+uint64_t pp1 = rahi * rblo;
+uint64_t pp2 = ralo * rbhi;
+uint64_t pp3 = rahi * rbhi;
+uint64_t c = ((uint32_t)pp1) + ((uint32_t)pp2) + (pp0 >> 32);
+phi = pp3 + (pp2 >> 32) + (pp1 >> 32) + (c >> 32);
+plo = (c << 32) | ((uint32_t)pp0);
+#endif
+return std::make_tuple(plo, phi);
+}
+
+/* Compute 128-bit product of 64-bit signed integer multiplication */
+inline std::tuple
+multiply(int64_t ra, int64_t rb) const
+{
+uint64_t plo, phi;
+#if defined(__SIZEOF_INT128__)
+__int128_t prod = (__int128_t)ra * rb;
+plo = prod;
+phi = prod >> 64;
+#else
+std::tie(plo, phi) = multiply((uint64_t)ra, (uint64_t)rb);
+if (rb < 0) phi -= (uint64_t)ra;
+if (ra < 0) phi -= (uint64_t)rb;
+#endif
+return std::make_tuple(plo, (int64_t)phi);
+}
+
std::string generateDisassembly(
Addr pc, const Loader::SymbolTable *symtab) const override;
};
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index beacd6f..b4c90fc 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -483,10 +483,15 @@
// These instructions are of XO form with bit 21 as the OE bit.
default: decode XO_XO {
-format IntSumOp {
-8: subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }}, true);
-10: addc({{ Ra }}, {{ Rb }}, computeCA = true);
-}
+8: IntSumOp::subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }}, true);
+
+9: IntArithCheckRcOp::mulhdu({{
+uint64_t res;
+std::tie(std::ignore, res) = multiply(Ra, Rb);
+Rt = res;
+}});
+
+10: IntSumOp::addc({{ Ra }}, {{ Rb }}, computeCA = true);
11: IntArithCheckRcOp::mulhwu({{
uint64_t res = (uint64_t)Ra_uw * Rb_uw;
@@ -496,11 +501,19 @@
40: IntSumOp::subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
-75: IntArithCheckRcOp::mulhw({{
-uint64_t res = (int64_t)Ra_sw * Rb_sw;
-res = res >> 32;
-Rt = res;
-}});
+format IntArithCheckRcOp {
+73: mulhd({{
+int64_t res;
+std::tie(std::ignore, res) = multiply(Ra_sd, Rb_sd);
+Rt = res;
+}});
+
+75: mulhw({{
+uint64_t res = (int64_t)Ra_sw * Rb_sw;
+res = res >> 32;
+Rt = res;
+}});
+}
format IntSumOp {
104: neg({{ ~Ra }}, {{ 1 }});
@@ -512,13 +525,26 @@
234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }}, true);
}
-235: IntArithCheckRcOp::mullw({{
-int64_t res = (int64_t)Ra_sw * Rb_sw;
-if (res != (int32_t)res) {
-setOV = true;
-}
-Rt = res;
-}}, true);
+format IntArithCheckRcOp {
+233: mulld({{
+int64_t src1 = Ra_sd;
+int64_t src2 = Rb_sd;
+