Sandipan Das has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/40898 )

Change subject: arch-power: Refactor arithmetic instructions
......................................................................

arch-power: Refactor arithmetic instructions

This changes the base classes for integer arithmetic
instructions and introduces two new classes that are used
to distinguish between instructions using register and
immediate operands.

Decoding has also been consolidated using formats that can
generate code after determining if an instruction records
carry and overflow and also if it records the nature of the
result, i.e. lesser than, greater than or equal to zero.
However, for multiply and divide instructions, the code to
determine if an overflow has occurred has been moved to the
instruction definition itself. The formats have also been
updated to make use of the new base classes.

Change-Id: I23d70ac4bad4d25d876308db0b3564c092bf574c
Signed-off-by: Sandipan Das <sandi...@linux.ibm.com>
---
M src/arch/power/insts/integer.hh
M src/arch/power/isa/decoder.isa
M src/arch/power/isa/formats/integer.isa
3 files changed, 142 insertions(+), 94 deletions(-)



diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh
index d81f98d..9efda43 100644
--- a/src/arch/power/insts/integer.hh
+++ b/src/arch/power/insts/integer.hh
@@ -142,6 +142,39 @@


 /**
+ * Class for integer arithmetic operations.
+ */
+class IntArithOp : public IntOp
+{
+  protected:
+
+    /// Constructor
+    IntArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntOp(mnem, _machInst, __opClass)
+    {
+    }
+};
+
+
+/**
+ * Class for integer immediate arithmetic operations.
+ */
+class IntImmArithOp : public IntArithOp
+{
+  protected:
+
+    int32_t simm;
+
+    /// Constructor
+    IntImmArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntArithOp(mnem, _machInst, __opClass),
+        simm((int16_t)machInst.si)
+    {
+    }
+};
+
+
+/**
  * Class for integer operations with a shift.
  */
 class IntShiftOp : public IntOp
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index e27fd92..f9fe68a 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -172,26 +172,34 @@
     }

     format IntImmArithCheckRaOp {
-        14: addi({{ Rt = Ra + imm; }},
-                 {{ Rt = imm }});
-
-        15: addis({{ Rt = Ra + (imm << 16); }},
-                  {{ Rt = imm << 16; }});
+        14: addi({{ Rt = Ra + simm; }},
+                 {{ Rt = simm }});
+        15: addis({{ Rt = Ra + (simm << 16); }},
+                  {{ Rt = simm << 16; }});
     }

     format IntImmArithOp {
-        12: addic({{ uint32_t src = Ra; Rt = src + imm; }},
-                  [computeCA]);
+        12: addic({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true);

-        13: addic_({{ uint32_t src = Ra; Rt = src + imm; }},
-                   [computeCA, computeCR0]);
+        13: addic_({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true, true);

-        8: subfic({{ int32_t src = ~Ra; Rt = src + imm + 1; }},
-                  [computeCA]);
+        8: subfic({{
+            uint64_t src = ~Ra;
+            Rt = src + simm + 1;
+        }},
+        true);

         7: mulli({{
             int32_t src = Ra_sw;
-            int64_t prod = src * imm;
+            int64_t prod = src * simm;
             Rt = (uint32_t)prod;
         }});
     }
@@ -508,11 +516,11 @@
                 104: neg({{ ~Ra }}, {{ 1 }});
                 138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
                           true);
-                234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }},
                            true);
                 136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
                            true);
-                232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }},
                             true);
                 202: addze({{ Ra }}, {{ xer.ca }},
                            computeCA = true);
@@ -522,21 +530,22 @@

             // Arithmetic instructions all use source registers Ra and Rb,
             // with destination register Rt.
-            format IntArithOp {
+            format IntArithCheckRcOp {
                 75: mulhw({{
                     int64_t prod = Ra_sd * Rb_sd;
                     Rt = prod >> 32;
                 }});
+
                 11: mulhwu({{
                     uint64_t prod = Ra_ud * Rb_ud;
                     Rt = prod >> 32;
                 }});
-                235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
-                747: mullwo({{
-                    int64_t src1 = Ra_sd;
-                    int64_t src2 = Rb;
-                    int64_t prod = src1 * src2;
-                    Rt = prod;
+
+                235: mullw({{
+                    int64_t prod = Ra_sd * Rb_sd; Rt = prod;
+                    if (prod != (int32_t)prod) {
+                        setOV = true;
+                    }
                 }},
                 true);

@@ -548,18 +557,7 @@
                         Rt = src1 / src2;
                     } else {
                         Rt = 0;
-                    }
-                }});
-
-                1003: divwo({{
-                    int32_t src1 = Ra_sw;
-                    int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
-                    } else {
-                        Rt = 0;
-                        divSetOV = true;
+                        setOV = true;
                     }
                 }},
                 true);
@@ -571,18 +569,8 @@
                         Rt = src1 / src2;
                     } else {
                         Rt = 0;
+                        setOV = true;
                     }
-                }});
-
-                971: divwuo({{
-                  uint32_t src1 = Ra_sw;
-                  uint32_t src2 = Rb_sw;
-                  if (src2 != 0) {
-                      Rt = src1 / src2;
-                  } else {
-                      Rt = 0;
-                      divSetOV = true;
-                  }
                 }},
                 true);
             }
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa
index 50badce..01ea9ba 100644
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -98,17 +98,12 @@
     }
 '''

-computeDivOVCode = '''
-    if (divSetOV) {
+setOVCode = '''
+    if (setOV) {
         xer.ov = 1;
         xer.so = 1;
     } else {
-        if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
-            xer.ov = 1;
-            xer.so = 1;
-        } else {
-            xer.ov = 0;
-        }
+        xer.ov = 0;
     }
 '''

@@ -136,21 +131,23 @@
 // value in source register Ra, hence the use of src to hold the actual
 // value. The control flags include the use of code to compute the
 // carry bit or the CR0 code.
-def format IntImmArithOp(code, ctrl_flags = [], inst_flags = []) {{
+def format IntImmArithOp(code, computeCA = 0, computeCR0 = 0,
+                         inst_flags = []) {{

-    # Set up the dictionary and deal with control flags
-    dict = {'result':'Rt', 'inputa':'src', 'inputb':'imm'}
-    if ctrl_flags:
+    # Set up the dictionary
+    dict = {'result':'Rt', 'inputa':'src', 'inputb':'simm'}
+
+    # Deal with computing CR0 and carry
+    if computeCA or computeCR0:
         code += readXERCode
-        for val in ctrl_flags:
-            if val == 'computeCA':
-                code += computeCACode % dict + setXERCode
-            elif val == 'computeCR0':
-                code += computeCR0Code % dict
+    if computeCA:
+        code += computeCACode % dict + setXERCode
+    if computeCR0:
+        code += computeCR0Code % dict

     # Generate the class
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags, BasicDecode,
+ GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags, BasicDecode,
                  BasicConstructor)
 }};

@@ -163,12 +160,12 @@

     # First the version where Ra is non-zero
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags,
                  CheckRaDecode, BasicConstructor)

     # Now another version where Ra == 0
     (header_output_ra0, decoder_output_ra0, _, exec_output_ra0) = \
-        GenAluOp(name, Name + 'RaZero', 'IntImmOp', code_ra0, inst_flags,
+ GenAluOp(name, Name + 'RaZero', 'IntImmArithOp', code_ra0, inst_flags,
                  CheckRaDecode, BasicConstructor)

     # Finally, add to the other outputs
@@ -264,9 +261,9 @@
     dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}

     # Add code to set up variables and do the sum
-    code  = 'uint32_t src1 = ' + src1 + ';\n'
-    code += 'uint32_t src2 = ' + src2 + ';\n'
-    code += 'uint32_t ca = ' + ca + ';\n'
+    code  = 'uint64_t src1 = ' + src1 + ';\n'
+    code += 'uint64_t src2 = ' + src2 + ';\n'
+    code += 'uint64_t ca = ' + ca + ';\n'
     code += 'Rt = src1 + src2 + ca;\n'

     # Add code for calculating the carry, if needed
@@ -284,16 +281,16 @@

     # Generate the classes
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
                  CheckRcOeDecode, BasicConstructor)
     (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
+        GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
                  CheckRcOeDecode, IntRcConstructor)
     (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
-        GenAluOp(name, Name + 'OeSet', 'IntOp', code_oe1, inst_flags,
+        GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
                  CheckRcOeDecode, IntOeConstructor)
(header_output_rc1_oe1, decoder_output_rc1_oe1, _, exec_output_rc1_oe1) = \
-        GenAluOp(name, Name + 'RcSetOeSet', 'IntOp', code_rc1_oe1,
+        GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
                  inst_flags, CheckRcOeDecode, IntRcOeConstructor)

     # Finally, add to the other outputs
@@ -309,39 +306,69 @@

 // Instructions that use source registers Ra and Rb, with the result
 // placed into Rt. Basically multiply and divide instructions. The
-// carry bit is never set, but overflow can be calculated. Division
-// explicitly sets the overflow bit in certain situations and this is
-// dealt with using the 'divSetOV' boolean in decoder.isa. We generate
-// two versions of each instruction to deal with the Rc bit.
-def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
+// carry bit is never set, but overflow can be calculated. In certain
+// situations, the overflow bits have to be set and this is dealt with
+// using the 'setOV' boolean in decoder.isa.
+//
+// In case overflow is to be calculated, we generate four versions of
+// each instruction to deal with different combinations of having the
+// OE bit set or unset and the Rc bit set or unset too. Otherwise, we
+// generate two versions of each instruction to deal with the Rc bit.
+def format IntArithCheckRcOp(code, computeOV = 0, inst_flags = []) {{

     # The result is always in Rt, but the source values vary
     dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}

     # Deal with setting the overflow flag
     if computeOV:
-        code = 'bool divSetOV = false;\n' + code
-        code += computeDivOVCode % dict + setXERCode
+        # Setup the 4 code versions and add code to access XER if necessary
+        code  = 'M5_VAR_USED bool setOV = false;\n' + code
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+        code_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 += computeCR0Code % dict

-    # Setup the 2 code versions and add code to access XER if necessary
-    code_rc1 = readXERCode + code + computeCR0Code % dict
-    if computeOV:
-        code = readXERCode + code
+        # Generate the classes
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcOeDecode, BasicConstructor)
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+ GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcOeDecode, IntRcConstructor)
+        (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
+ GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
+                     CheckRcOeDecode, IntOeConstructor)
+        (header_output_rc1_oe1, decoder_output_rc1_oe1, _,
+         exec_output_rc1_oe1) = \
+            GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
+                     inst_flags, CheckRcOeDecode, IntRcOeConstructor)

-    # Generate the classes
-    (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
-                 CheckRcDecode, BasicConstructor)
+        # Finally, add to the other outputs
+        header_output += \
+            header_output_rc1 + header_output_oe1 + header_output_rc1_oe1
+        decoder_output += \
+ decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1
+        exec_output += \
+            exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1

-    # Generate the second class
-    (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
-                 CheckRcDecode, IntRcConstructor)
+    else:
+        # Setup the 2 code versions and add code to access XER if necessary
+        code_rc1 = readXERCode + code + computeCR0Code % dict

-    # Finally, add to the other outputs
-    header_output += header_output_rc1
-    decoder_output += decoder_output_rc1
-    exec_output += exec_output_rc1
+        # Generate the first class
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcDecode, BasicConstructor)
+
+        # Generate the second class
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+ GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcDecode, IntRcConstructor)
+
+        # Finally, add to the other outputs
+        header_output += header_output_rc1
+        decoder_output += decoder_output_rc1
+        exec_output += exec_output_rc1
 }};



--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/40898
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I23d70ac4bad4d25d876308db0b3564c092bf574c
Gerrit-Change-Number: 40898
Gerrit-PatchSet: 1
Gerrit-Owner: Sandipan Das <sandi...@linux.ibm.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to