Giacomo Gabrielli has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/13521
Change subject: arch-arm: Add initial support for SVE gather/scatter
loads/stores
......................................................................
arch-arm: Add initial support for SVE gather/scatter loads/stores
In addition to the code for implementing decoding and execution of
these instructions, a couple of additional features have been
implemented:
1. a mechanism to skip the memory access part of a load/store
operation - used by gather loads for implementing zeroing
predication;
2. the Arm decoder was augmented in a previous changeset to keep track
of the active SVE vector length - this feature is now used by
gather/scatter operations to determine the right number of microops to
be generated.
Change-Id: I891623015b47a39f61ed616f8896f32a7134c8e2
Signed-off-by: Giacomo Gabrielli <[email protected]>
---
M src/arch/arm/decoder.hh
A src/arch/arm/insts/sve_macromem.hh
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/includes.isa
M src/arch/arm/isa/insts/sve_mem.isa
M src/arch/arm/isa/operands.isa
M src/arch/arm/isa/templates/sve_mem.isa
M src/arch/arm/registers.hh
M src/arch/isa_parser.py
M src/cpu/base_dyn_inst.hh
M src/cpu/base_dyn_inst_impl.hh
M src/cpu/checker/cpu.hh
M src/cpu/exec_context.hh
M src/cpu/minor/exec_context.hh
M src/cpu/o3/lsq_unit_impl.hh
M src/cpu/simple/exec_context.hh
M src/cpu/simple_thread.hh
M src/cpu/thread_context.hh
18 files changed, 1,491 insertions(+), 107 deletions(-)
diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh
index f44a981..45b1e2b 100644
--- a/src/arch/arm/decoder.hh
+++ b/src/arch/arm/decoder.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
diff --git a/src/arch/arm/insts/sve_macromem.hh
b/src/arch/arm/insts/sve_macromem.hh
new file mode 100644
index 0000000..7cad9a6
--- /dev/null
+++ b/src/arch/arm/insts/sve_macromem.hh
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2018 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Giacomo Gabrielli
+ */
+
+#ifndef __ARCH_ARM_SVE_MACROMEM_HH__
+#define __ARCH_ARM_SVE_MACROMEM_HH__
+
+#include "arch/arm/generated/decoder.hh"
+#include "arch/arm/insts/pred_inst.hh"
+
+namespace ArmISA {
+
+template <typename RegElemType, typename MemElemType,
+ template <typename, typename> class MicroopType>
+class SveIndexedMemVI : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ uint64_t imm;
+
+ public:
+ SveIndexedMemVI(const char *mnem, ExtMachInst machInst, OpClass
__opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ uint64_t _imm)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), imm(_imm)
+ {
+ bool isLoad = (__opClass == MemReadOp);
+
+ int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
+
+ numMicroops = num_elems;
+ if (isLoad) {
+ numMicroops++;
+ }
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ StaticInstPtr *uop = microOps;
+
+ if (isLoad) {
+ // The first microop of a gather load copies the source vector
+ // register used for address calculation to an auxiliary
register,
+ // with all subsequent microops reading from the latter. This
is
+ // needed to properly handle cases where the source vector
+ // register is the same as the destination register
+ *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
+ mnem, machInst, _base, this);
+ uop++;
+ }
+
+ for (int i = 0; i < num_elems; i++, uop++) {
+ *uop = new MicroopType<RegElemType, MemElemType>(
+ mnem, machInst, __opClass, _dest, _gp,
+ isLoad ? (IntRegIndex) VECREG_UREG0 : _base, _imm, i,
+ num_elems);
+ }
+
+ --uop;
+ (*uop)->setLastMicroop();
+ microOps[0]->setFirstMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop();
uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printVecReg(ss, base, true);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
+ }
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+template <typename RegElemType, typename MemElemType,
+ template <typename, typename> class MicroopType>
+class SveIndexedMemSV : public PredMacroOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+
+ bool offsetIs32;
+ bool offsetIsSigned;
+ bool offsetIsScaled;
+
+ public:
+ SveIndexedMemSV(const char *mnem, ExtMachInst machInst, OpClass
__opClass,
+ IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
+ IntRegIndex _offset, bool _offsetIs32,
+ bool _offsetIsSigned, bool _offsetIsScaled)
+ : PredMacroOp(mnem, machInst, __opClass),
+ dest(_dest), gp(_gp), base(_base), offset(_offset),
+ offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
+ offsetIsScaled(_offsetIsScaled)
+ {
+ bool isLoad = (__opClass == MemReadOp);
+
+ int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
+
+ numMicroops = num_elems;
+ if (isLoad) {
+ numMicroops++;
+ }
+
+ microOps = new StaticInstPtr[numMicroops];
+
+ StaticInstPtr *uop = microOps;
+
+ if (isLoad) {
+ // The first microop of a gather load copies the source vector
+ // register used for address calculation to an auxiliary
register,
+ // with all subsequent microops reading from the latter. This
is
+ // needed to properly handle cases where the source vector
+ // register is the same as the destination register
+ *uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
+ mnem, machInst, _offset, this);
+ uop++;
+ }
+
+ for (int i = 0; i < num_elems; i++, uop++) {
+ *uop = new MicroopType<RegElemType, MemElemType>(
+ mnem, machInst, __opClass, _dest, _gp, _base,
+ isLoad ? (IntRegIndex) VECREG_UREG0 : _offset, _offsetIs32,
+ _offsetIsSigned, _offsetIsScaled, i, num_elems);
+ }
+
+ --uop;
+ (*uop)->setLastMicroop();
+ microOps[0]->setFirstMicroop();
+
+ for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop();
uop++) {
+ (*uop)->setDelayedCommit();
+ }
+ }
+
+ Fault
+ execute(ExecContext *, Trace::InstRecord *) const
+ {
+ panic("Execute method called when it shouldn't!");
+ return NoFault;
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printPredReg(ss, gp);
+ ccprintf(ss, "/z, [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printVecReg(ss, offset, true);
+ ccprintf(ss, "]");
+ return ss.str();
+ }
+};
+
+} // namespace ArmISA
+
+#endif // __ARCH_ARM_SVE_MACROMEM_HH__
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index f845835..a0e450a 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1,3 +1,4 @@
+// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -2802,34 +2803,153 @@
StaticInstPtr
decodeSveMemGather32(ExtMachInst machInst)
{
- // TODO: for now only LDR and LD1R are implemented
- if (bits(machInst, 22) && bits(machInst, 15)) {
- IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = bits(machInst, 21, 16);
- IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12,
10);
- uint8_t dtype = (bits(machInst, 24, 23) << 2) |
- bits(machInst, 14, 13);
- return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
- dtype, machInst, zt, pg, rn, imm, false, true);
- } else if (bits(machInst, 24, 22) == 0x6 &&
- bits(machInst, 15, 13) == 0x0 &&
- bits(machInst, 4) == 0x0) {
- IntRegIndex pt = (IntRegIndex) (uint8_t) bits(machInst, 3, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
- bits(machInst, 12, 10));
- return new SveLdrPred(machInst, pt, rn, imm);
- } else if (bits(machInst, 24, 22) == 0x6 &&
- bits(machInst, 15, 13) == 0x2) {
- IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
- IntRegIndex rn = makeSP(
- (IntRegIndex) (uint8_t) bits(machInst, 9, 5));
- uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
- bits(machInst, 12, 10));
- return new SveLdrVec(machInst, zt, rn, imm);
+ if (bits(machInst, 15)) {
+ if (bits(machInst, 22)) {
+ // SVE load and broadcast element
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ uint64_t imm = bits(machInst, 21, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 2) |
+ bits(machInst, 14, 13);
+ return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
+ dtype, machInst, zt, pg, rn, imm, false, true);
+ } else {
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadVIInsts(
+ dtype, machInst, zt, pg, zn, imm, true, ff);
+ } else {
+ uint8_t b14_13 = bits(machInst, 14, 13);
+ if (b14_13 == 0x2 && bits(machInst, 4) == 0) {
+ // TODO: SVE contiguous prefetch (scalar plus
scalar)
+ return new Unknown64(machInst);
+ } else if (b14_13 == 0x3 && bits(machInst, 4) == 0) {
+ // TODO: SVE 32-bit gather prefetch (vector plus
+ // immediate)
+ return new Unknown64(machInst);
+ }
+ }
+ }
+ } else {
+ uint8_t b24_23 = bits(machInst, 24, 23);
+ if (b24_23 != 0x3 && bits(machInst, 21) == 0) {
+ // SVE 32-bit gather load (scalar plus 32-bit unscaled
offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ true, true, xs, false, ff);
+ }
+ switch (b24_23) {
+ case 0x0:
+ if (bits(machInst, 21) && bits(machInst, 4) == 0) {
+ // TODO: SVE 32-bit gather prefetch (vector plus
immediate)
+ break;
+ }
+ break;
+ case 0x1:
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load halfwords (scalar plus 32-bit
+ // scaled offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ if (bits(machInst, 14)) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ } else {
+ return new SveIndexedMemSV<int32_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ }
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 21)) {
+ // SVE 32-bit gather load words (scalar plus 32-bit
scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ "ld1", machInst, MemReadOp, zt, pg, rn, zm,
+ true, xs, true);
+ }
+ break;
+ case 0x3:
+ if (bits(machInst, 22) == 0 && bits(machInst, 14, 13) ==
0x0 &&
+ bits(machInst, 4) == 0) {
+ // SVE load predicate register
+ IntRegIndex pt = (IntRegIndex) (uint8_t)
+ bits(machInst, 3, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
+ bits(machInst, 12, 10));
+ return new SveLdrPred(machInst, pt, rn, imm);
+ } else if (bits(machInst, 22) == 0 &&
+ bits(machInst, 14, 13) == 0x2) {
+ // SVE load vector register
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
+ bits(machInst, 12, 10));
+ return new SveLdrVec(machInst, zt, rn, imm);
+ }
+ break;
+ }
}
return new Unknown64(machInst);
} // decodeSveMemGather32
@@ -2954,6 +3074,124 @@
StaticInstPtr
decodeSveMemGather64(ExtMachInst machInst)
{
+ switch ((bits(machInst, 21) << 1) | bits(machInst, 15)) {
+ case 0x0:
+ {
+ // SVE 64-bit gather load (scalar plus unpacked 32-bit
unscaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, true, xs, false, ff);
+ }
+ case 0x1:
+ if (bits(machInst, 22)) {
+ // SVE 64-bit gather load (scalar plus 64-bit unscaled
offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, false, false, false, ff);
+ } else {
+ if (bits(machInst, 14, 13) == 0x3 && bits(machInst, 4) ==
0) {
+ // TODO: SVE 64-bit gather prefetch (vector plus
immediate)
+ break;
+ }
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit gather load (scalar plus unpacked 32-bit
scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t xs = bits(machInst, 22);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, true, xs, true, ff);
+ } else if (bits(machInst, 4) == 0) {
+ // TODO: SVE 64-bit gather prefetch (scalar plus unpacked
+ // 32-bit scaled offsets)
+ return new Unknown64(machInst);
+ }
+ break;
+ case 0x3:
+ if (bits(machInst, 22) == 0) {
+ // SVE 64-bit gather load (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadVIInsts(
+ dtype, machInst, zt, pg, zn, imm, false, ff);
+ } else {
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit gather load (scalar plus 64-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t dtype = (bits(machInst, 24, 23) << 1) |
+ bits(machInst, 14);
+ uint8_t ff = bits(machInst, 13);
+ if (ff) {
+ return new Unknown64(machInst);
+ }
+ return decodeSveGatherLoadSVInsts(
+ dtype, machInst, zt, pg, rn, zm,
+ false, false, false, true, ff);
+ } else if (bits(machInst, 4) == 0) {
+ // TODO: SVE 64-bit gather prefetch (scalar plus 64-bit
+ // scaled offsets)
+ break;
+ }
+ }
+ break;
+ }
return new Unknown64(machInst);
} // decodeSveMemGather64
@@ -2993,36 +3231,12 @@
} // decodeSveContigNTStoreSS
StaticInstPtr
- decodeSveScatterStore64SV32U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV32U
-
- StaticInstPtr
- decodeSveScatterStore64SV64U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV64U
-
- StaticInstPtr
decodeSveContigNTStoreSI(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveContigNTStoreSI
StaticInstPtr
- decodeSveScatterStore64VI(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64VI
-
- StaticInstPtr
- decodeSveScatterStore32SV32S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32SV32S
-
- StaticInstPtr
decodeSveStoreStructsSS(ExtMachInst machInst)
{
return new Unknown64(machInst);
@@ -3035,30 +3249,6 @@
} // decodeSveStoreStructsSI
StaticInstPtr
- decodeSveScatterStore32SV32U(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32SV32U
-
- StaticInstPtr
- decodeSveScatterStore32VI(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore32VI
-
- StaticInstPtr
- decodeSveScatterStore64SV32S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV32S
-
- StaticInstPtr
- decodeSveScatterStore64SV64S(ExtMachInst machInst)
- {
- return new Unknown64(machInst);
- } // decodeSveScatterStore64SV64S
-
- StaticInstPtr
decodeSveMemStore(ExtMachInst machInst)
{
switch (bits(machInst, 15, 13)) {
@@ -3092,37 +3282,118 @@
}
case 0x4:
case 0x6:
- switch (bits(machInst, 22, 21)) {
- case 0x0:
- return decodeSveScatterStore64SV32U(machInst);
- case 0x1:
- if (bits(machInst, 24, 23) != 0x0) {
- return decodeSveScatterStore64SV32S(machInst);
+ {
+ IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4,
0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9,
5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+ uint8_t xs = bits(machInst, 22);
+
+ switch (bits(machInst, 22, 21)) {
+ case 0x0:
+ // SVE 64-bit scatter store (scalar plus unpacked
32-bit
+ // unscaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, true, xs, false);
+ case 0x1:
+ if (bits(machInst, 24, 23) != 0x0) {
+ // SVE 64-bit scatter store (scalar plus unpacked
+ // 32-bit scaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, true, xs, true);
+ }
+ break;
+ case 0x2:
+ if (bits(machInst, 24, 23) != 0x3) {
+ // SVE 32-bit scatter store (scalar plus 32-bit
+ // unscaled offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ true, true, xs, false);
+ }
+ break;
+ case 0x3:
+ // SVE 32-bit scatter store (scalar plus 32-bit scaled
+ // offsets)
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ true, true, xs, true);
}
- break;
- case 0x2:
- if (bits(machInst, 24, 23) != 0x3) {
- return decodeSveScatterStore32SV32U(machInst);
- }
- break;
- case 0x3:
- return decodeSveScatterStore32SV32S(machInst);
}
break;
case 0x5:
switch (bits(machInst, 22, 21)) {
case 0x0:
- return decodeSveScatterStore64SV64U(machInst);
+ {
+ // SVE 64-bit scatter store (scalar plus 64-bit
unscaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, false, false, false);
+ }
case 0x1:
if (bits(machInst, 24, 23) != 0x0) {
- return decodeSveScatterStore64SV64S(machInst);
+ // SVE 64-bit scatter store (scalar plus 64-bit scaled
+ // offsets)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex rn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ IntRegIndex zm = (IntRegIndex) (uint8_t)
+ bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreSVInsts(
+ msz, machInst, zt, pg, rn, zm,
+ false, false, false, true);
}
break;
case 0x2:
- return decodeSveScatterStore64VI(machInst);
+ {
+ // SVE 64-bit scatter store (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreVIInsts(
+ msz, machInst, zt, pg, zn, imm, false);
+ }
case 0x3:
if (bits(machInst, 24, 23) != 0x3) {
- return decodeSveScatterStore64VI(machInst);
+ // SVE 32-bit scatter store (vector plus immediate)
+ IntRegIndex zt = (IntRegIndex) (uint8_t)
+ bits(machInst, 4, 0);
+ IntRegIndex zn = (IntRegIndex) (uint8_t)
+ bits(machInst, 9, 5);
+ uint64_t imm = bits(machInst, 20, 16);
+ IntRegIndex pg = (IntRegIndex) (uint8_t)
+ bits(machInst, 12, 10);
+ uint8_t msz = bits(machInst, 24, 23);
+
+ return decodeSveScatterStoreVIInsts(
+ msz, machInst, zt, pg, zn, imm, true);
}
break;
}
diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa
index 9aef8c6..f054bc8 100644
--- a/src/arch/arm/isa/includes.isa
+++ b/src/arch/arm/isa/includes.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010, 2012, 2017 ARM Limited
+// Copyright (c) 2010, 2012, 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -68,9 +68,10 @@
#include "arch/arm/insts/sve_mem.hh"
#include "arch/arm/insts/vfp.hh"
#include "arch/arm/isa_traits.hh"
+#include "enums/DecoderFlavour.hh"
#include "mem/packet.hh"
#include "sim/faults.hh"
-#include "enums/DecoderFlavour.hh"
+
}};
output decoder {{
@@ -80,11 +81,12 @@
#include "arch/arm/decoder.hh"
#include "arch/arm/faults.hh"
+#include "arch/arm/insts/sve_macromem.hh"
#include "arch/arm/intregs.hh"
#include "arch/arm/isa_traits.hh"
#include "arch/arm/utility.hh"
-#include "base/loader/symtab.hh"
#include "base/cprintf.hh"
+#include "base/loader/symtab.hh"
#include "cpu/thread_context.hh"
using namespace ArmISA;
@@ -102,8 +104,10 @@
#include "base/crc.hh"
#include "cpu/base.hh"
#include "sim/pseudo_inst.hh"
+
#if defined(linux)
#include <fenv.h>
+
#endif
#include "base/cp_annotate.hh"
diff --git a/src/arch/arm/isa/insts/sve_mem.isa
b/src/arch/arm/isa/insts/sve_mem.isa
index f4ca4c3..3102e80 100644
--- a/src/arch/arm/isa/insts/sve_mem.isa
+++ b/src/arch/arm/isa/insts/sve_mem.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017 ARM Limited
+// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -204,6 +204,288 @@
}};
+output decoder {{
+
+ StaticInstPtr
+ decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex
zn,
+ uint64_t imm, bool esizeIs32,
+ bool firstFaulting)
+ {
+ const char* mn = firstFaulting ? "ldff1" : "ld1";
+ switch (dtype) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<int32_t, int8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<int64_t, int8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint8_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<int32_t, int16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<int64_t, int16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint16_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x4:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<int64_t, int32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x5:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint32_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ case 0x7:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint64_t,
+ SveGatherLoadVIMicroop>(
+ mn, machInst, MemReadOp, zt, pg, zn, imm);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg, IntRegIndex
rn,
+ IntRegIndex zm, bool esizeIs32, bool
offsetIs32,
+ bool offsetIsSigned, bool offsetIsScaled,
+ bool firstFaulting)
+ {
+ const char* mn = firstFaulting ? "ldff1" : "ld1";
+ switch (dtype) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<int32_t, int8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<int64_t, int8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint8_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<int32_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<int64_t, int16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint16_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x4:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<int64_t, int32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x5:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint32_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x7:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint64_t,
+ SveGatherLoadSVMicroop>(
+ mn, machInst, MemReadOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg,
+ IntRegIndex zn, uint64_t imm,
+ bool esizeIs32)
+ {
+ const char* mn = "st1";
+ switch (msz) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint8_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint8_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint16_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint16_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemVI<uint32_t, uint32_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint32_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemVI<uint64_t, uint64_t,
+ SveScatterStoreVIMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, zn, imm);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+ StaticInstPtr
+ decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst,
+ IntRegIndex zt, IntRegIndex pg,
+ IntRegIndex rn, IntRegIndex zm,
+ bool esizeIs32, bool offsetIs32,
+ bool offsetIsSigned, bool offsetIsScaled)
+ {
+ const char* mn = "st1";
+ switch (msz) {
+ case 0x0:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint8_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint8_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x1:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint16_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint16_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x2:
+ if (esizeIs32) {
+ return new SveIndexedMemSV<uint32_t, uint32_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint32_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ case 0x3:
+ if (esizeIs32) {
+ break;
+ } else {
+ return new SveIndexedMemSV<uint64_t, uint64_t,
+ SveScatterStoreSVMicroop>(
+ mn, machInst, MemWriteOp, zt, pg, rn, zm,
+ offsetIs32, offsetIsSigned, offsetIsScaled);
+ }
+ }
+ return new Unknown64(machInst);
+ }
+
+}};
+
+
let {{
header_output = ''
@@ -323,6 +605,31 @@
('uint64_t', 'uint64_t'),
)
+ gatherLoadTplArgs = (
+ ('int32_t', 'int8_t'),
+ ('int64_t', 'int8_t'),
+ ('uint32_t', 'uint8_t'),
+ ('uint64_t', 'uint8_t'),
+ ('int32_t', 'int16_t'),
+ ('int64_t', 'int16_t'),
+ ('uint32_t', 'uint16_t'),
+ ('uint64_t', 'uint16_t'),
+ ('int64_t', 'int32_t'),
+ ('uint32_t', 'uint32_t'),
+ ('uint64_t', 'uint32_t'),
+ ('uint64_t', 'uint64_t'),
+ )
+
+ scatterStoreTplArgs = (
+ ('uint32_t', 'uint8_t'),
+ ('uint64_t', 'uint8_t'),
+ ('uint32_t', 'uint16_t'),
+ ('uint64_t', 'uint16_t'),
+ ('uint32_t', 'uint32_t'),
+ ('uint64_t', 'uint32_t'),
+ ('uint64_t', 'uint64_t'),
+ )
+
# Generates definitions for SVE contiguous loads
def emitSveContigMemInsts(offsetIsImm):
global header_output, exec_output, decoders
@@ -437,9 +744,124 @@
'class_name': 'SveLoadAndRepl'}
exec_output += SveContigMemExecDeclare.subst(substDict)
+ class IndexedAddrForm:
+ VEC_PLUS_IMM = 0
+ SCA_PLUS_VEC = 1
+
+ # Generates definitions for the transfer microops of SVE indexed memory
+ # operations (gather loads, scatter stores)
+ def emitSveIndexedMemMicroops(indexed_addr_form):
+ assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM,
+ IndexedAddrForm.SCA_PLUS_VEC)
+ global header_output, exec_output, decoders
+ tplHeader = 'template <class RegElemType, class MemElemType>'
+ tplArgs = '<RegElemType, MemElemType>'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
+ eaCode = '''
+ EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)'''
+ else:
+ eaCode = '''
+ uint64_t offset = AA64FpOffset_x[elemIndex];
+ if (offsetIs32) {
+ offset &= (1ULL << 32) - 1;
+ }
+ if (offsetIsSigned) {
+ offset = sext<32>(offset);
+ }
+ if (offsetIsScaled) {
+ offset *= sizeof(MemElemType);
+ }
+ EA = XBase + offset'''
+ loadMemAccCode = '''
+ if (GpOp_x[elemIndex]) {
+ AA64FpDest_x[elemIndex] = memData;
+ } else {
+ AA64FpDest_x[elemIndex] = 0;
+ }
+ '''
+ storeMemAccCode = '''
+ memData = AA64FpDest_x[elemIndex];
+ '''
+ predCheckCode = 'GpOp_x[elemIndex]'
+ loadIop = InstObjParams('ld1',
+ ('SveGatherLoadVIMicroop'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveGatherLoadSVMicroop'),
+ 'MicroOp',
+ {'tpl_header': tplHeader,
+ 'tpl_args': tplArgs,
+ 'memacc_code': loadMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'pred_check_code' : predCheckCode,
+ 'fa_code' : ''},
+ ['IsMicroop', 'IsMemRef', 'IsLoad'])
+ storeIop = InstObjParams('st1',
+ ('SveScatterStoreVIMicroop'
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveScatterStoreSVMicroop'),
+ 'MicroOp',
+ {'tpl_header': tplHeader,
+ 'tpl_args': tplArgs,
+ 'memacc_code': storeMemAccCode,
+ 'ea_code' : sveEnabledCheckCode + eaCode,
+ 'pred_check_code' : predCheckCode,
+ 'fa_code' : ''},
+ ['IsMicroop', 'IsMemRef', 'IsStore'])
+ if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
+ header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop)
+ header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop)
+ else:
+ header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop)
+ header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop)
+ exec_output += (
+ SveGatherLoadMicroopExecute.subst(loadIop) +
+ SveGatherLoadMicroopInitiateAcc.subst(loadIop) +
+ SveGatherLoadMicroopCompleteAcc.subst(loadIop) +
+ SveScatterStoreMicroopExecute.subst(storeIop) +
+ SveScatterStoreMicroopInitiateAcc.subst(storeIop) +
+ SveScatterStoreMicroopCompleteAcc.subst(storeIop))
+ for args in gatherLoadTplArgs:
+ substDict = {'tpl_args': '<%s>' % ', '.join(args),
+ 'class_name': (
+ 'SveGatherLoadVIMicroop'
+ if indexed_addr_form == \
+ IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveGatherLoadSVMicroop')}
+ # TODO: this should become SveMemExecDeclare
+ exec_output += SveContigMemExecDeclare.subst(substDict)
+ for args in scatterStoreTplArgs:
+ substDict = {'tpl_args': '<%s>' % ', '.join(args),
+ 'class_name': (
+ 'SveScatterStoreVIMicroop'
+ if indexed_addr_form == \
+ IndexedAddrForm.VEC_PLUS_IMM
+ else 'SveScatterStoreSVMicroop')}
+ # TODO: this should become SveMemExecDeclare
+ exec_output += SveContigMemExecDeclare.subst(substDict)
+
+ # Generates definitions for the first microop of SVE gather loads,
required
+ # to propagate the source vector register to the transfer microops
+ def emitSveGatherLoadCpySrcVecMicroop():
+ global header_output, exec_output, decoders
+ code = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
+ xc->tcBase());
+ for (unsigned i = 0; i < eCount; i++) {
+ AA64FpUreg0_ub[i] = AA64FpOp1_ub[i];
+ }'''
+ iop = InstObjParams('ld1',
+ 'SveGatherLoadCpySrcVecMicroop',
+ 'MicroOp',
+ {'code': code},
+ ['IsMicroop'])
+ header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
+ exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
+
# LD1[S]{B,H,W,D} (scalar plus immediate)
+ # ST1[S]{B,H,W,D} (scalar plus immediate)
emitSveContigMemInsts(True)
# LD1[S]{B,H,W,D} (scalar plus scalar)
+ # ST1[S]{B,H,W,D} (scalar plus scalar)
emitSveContigMemInsts(False)
# LD1R[S]{B,H,W,D}
@@ -450,4 +872,14 @@
# LDR (vector), STR (vector)
emitSveMemFillSpill(False)
+ # LD1[S]{B,H,W,D} (vector plus immediate)
+ # ST1[S]{B,H,W,D} (vector plus immediate)
+ emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM)
+ # LD1[S]{B,H,W,D} (scalar plus vector)
+ # ST1[S]{B,H,W,D} (scalar plus vector)
+ emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC)
+
+ # Source vector copy microop for gather loads
+ emitSveGatherLoadCpySrcVecMicroop()
+
}};
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 037ea76..6c93599 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -1,5 +1,5 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2014, 2016 ARM Limited
+// Copyright (c) 2010-2014, 2016-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -527,6 +527,39 @@
'AA64FpDestMergeQ': vectorRegElem('0', 'tud', zeroing = True)
}),
+ 'AA64FpBase': vectorReg('base',
+ {
+ 'AA64FpBaseP0': vectorRegElem('0'),
+ 'AA64FpBaseP1': vectorRegElem('1'),
+ 'AA64FpBaseP2': vectorRegElem('2'),
+ 'AA64FpBaseP3': vectorRegElem('3'),
+ 'AA64FpBaseS': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpBaseD': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpBaseQ': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64FpOffset': vectorReg('offset',
+ {
+ 'AA64FpOffsetP0': vectorRegElem('0'),
+ 'AA64FpOffsetP1': vectorRegElem('1'),
+ 'AA64FpOffsetP2': vectorRegElem('2'),
+ 'AA64FpOffsetP3': vectorRegElem('3'),
+ 'AA64FpOffsetS': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpOffsetD': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpOffsetQ': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
+ 'AA64FpUreg0': vectorReg('VECREG_UREG0',
+ {
+ 'AA64FpUreg0P0': vectorRegElem('0'),
+ 'AA64FpUreg0P1': vectorRegElem('1'),
+ 'AA64FpUreg0P2': vectorRegElem('2'),
+ 'AA64FpUreg0P3': vectorRegElem('3'),
+ 'AA64FpUreg0S': vectorRegElem('0', 'sf', zeroing = True),
+ 'AA64FpUreg0D': vectorRegElem('0', 'df', zeroing = True),
+ 'AA64FpUreg0Q': vectorRegElem('0', 'tud', zeroing = True)
+ }),
+
# Predicate register operands
'GpOp': predReg('gp'),
'POp1': predReg('op1'),
diff --git a/src/arch/arm/isa/templates/sve_mem.isa
b/src/arch/arm/isa/templates/sve_mem.isa
index 8471e44..c57225d 100644
--- a/src/arch/arm/isa/templates/sve_mem.isa
+++ b/src/arch/arm/isa/templates/sve_mem.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017 ARM Limited
+// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -384,3 +384,342 @@
}
}};
+def template SveIndexedMemVIMicroopDeclare {{
+ %(tpl_header)s
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef RegElemType TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ uint64_t imm;
+
+ int elemIndex;
+ int numElems;
+
+ unsigned memAccessFlags;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
+ IntRegIndex _base, uint64_t _imm, int _elemIndex, int
_numElems)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), imm(_imm),
+ elemIndex(_elemIndex), numElems(_numElems),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ if (_opClass == MemReadOp && elemIndex == 0) {
+ // The first micro-op is responsible for pinning the
+ // destination register
+ _destRegIdx[0].setNumPinnedWrites(numElems - 1);
+ }
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *)
const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer register
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printVecReg(ss, base, true);
+ if (imm != 0) {
+ ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
+ }
+ ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveIndexedMemSVMicroopDeclare {{
+ %(tpl_header)s
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ typedef RegElemType TPElem;
+
+ IntRegIndex dest;
+ IntRegIndex gp;
+ IntRegIndex base;
+ IntRegIndex offset;
+
+ bool offsetIs32;
+ bool offsetIsSigned;
+ bool offsetIsScaled;
+
+ int elemIndex;
+ int numElems;
+
+ unsigned memAccessFlags;
+
+ public:
+ %(class_name)s(const char* mnem, ExtMachInst machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
+ IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32,
+ bool _offsetIsSigned, bool _offsetIsScaled, int _elemIndex,
+ int _numElems)
+ : %(base_class)s(mnem, machInst, %(op_class)s),
+ dest(_dest), gp(_gp), base(_base), offset(_offset),
+ offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
+ offsetIsScaled(_offsetIsScaled), elemIndex(_elemIndex),
+ numElems(_numElems),
+ memAccessFlags(ArmISA::TLB::AllowUnaligned |
+ ArmISA::TLB::MustBeOne)
+ {
+ %(constructor)s;
+ if (_opClass == MemReadOp && elemIndex == 0) {
+ // The first micro-op is responsible for pinning the
+ // destination register
+ _destRegIdx[0].setNumPinnedWrites(numElems - 1);
+ }
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+ Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *)
const;
+
+ virtual void
+ annotateFault(ArmFault *fault)
+ {
+ %(fa_code)s
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ // TODO: add suffix to transfer and base registers
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, "{");
+ printVecReg(ss, dest, true);
+ ccprintf(ss, "}, ");
+ printPredReg(ss, gp);
+ if (_opClass == MemReadOp) {
+ ccprintf(ss, "/z");
+ }
+ ccprintf(ss, ", [");
+ printIntReg(ss, base);
+ ccprintf(ss, ", ");
+ printVecReg(ss, offset, true);
+ ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
+ return ss.str();
+ }
+ };
+}};
+
+def template SveGatherLoadMicroopExecute {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+
+ if (%(pred_check_code)s) {
+ fault = readMemAtomic(xc, traceData, EA, memData,
+ this->memAccessFlags);
+ }
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveGatherLoadMicroopInitiateAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_src_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+
+ if (%(pred_check_code)s) {
+ fault = initiateMemRead(xc, traceData, EA, memData,
+ this->memAccessFlags);
+ } else {
+ xc->setMemAccPredicate(false);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveGatherLoadMicroopCompleteAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+
+ MemElemType memData = 0;
+ if (%(pred_check_code)s) {
+ getMem(pkt, memData, traceData);
+ }
+
+ if (fault == NoFault) {
+ %(memacc_code)s;
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopExecute {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+ %(memacc_code)s;
+
+ if (%(pred_check_code)s) {
+ fault = writeMemAtomic(xc, traceData, memData, EA,
+ this->memAccessFlags, NULL);
+ }
+
+ if (fault == NoFault) {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopInitiateAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Addr EA;
+ Fault fault = NoFault;
+ bool aarch64 M5_VAR_USED = true;
+
+ %(op_decl)s;
+ %(op_rd)s;
+ %(ea_code)s;
+
+ MemElemType memData;
+ %(memacc_code)s;
+
+ if (%(pred_check_code)s) {
+ fault = writeMemTiming(xc, traceData, memData, EA,
+ this->memAccessFlags, NULL);
+ } else {
+ xc->setPredicate(false);
+ }
+
+ return fault;
+ }
+}};
+
+def template SveScatterStoreMicroopCompleteAcc {{
+ %(tpl_header)s
+ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
+ ExecContext *xc, Trace::InstRecord *traceData) const
+ {
+ return NoFault;
+ }
+}};
+
+def template SveGatherLoadCpySrcVecMicroopDeclare {{
+ class SveGatherLoadCpySrcVecMicroop : public MicroOp
+ {
+ protected:
+ IntRegIndex op1;
+
+ StaticInst *macroOp;
+
+ public:
+ SveGatherLoadCpySrcVecMicroop(const char* mnem, ExtMachInst
machInst,
+ IntRegIndex _op1, StaticInst *_macroOp)
+ : MicroOp(mnem, machInst, SimdAluOp), op1(_op1),
macroOp(_macroOp)
+ {
+ %(constructor)s;
+ }
+
+ Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ std::stringstream ss;
+ ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
+ ccprintf(ss, " (uop src vec cpy)");
+ return ss.str();
+ }
+ };
+}};
+
+def template SveGatherLoadCpySrcVecMicroopExecute {{
+ Fault SveGatherLoadCpySrcVecMicroop::execute(ExecContext *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+ %(op_decl)s;
+ %(op_rd)s;
+
+ %(code)s;
+ if (fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+
+ return fault;
+ }
+}};
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index e96020f..c509605 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -98,6 +98,7 @@
const int NumIntRegs = NUM_INTREGS;
const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int VECREG_UREG0 = 32;
const int NumCCRegs = NUM_CCREGS;
const int NumPredRegs = 17; // P0-P15, FFR
const int PREDREG_FFR = 16;
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index b4d0240..4b80578 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014, 2016 ARM Limited
+# Copyright (c) 2014, 2016, 2018 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -1641,6 +1641,9 @@
# decoder header - everything depends on this
file = 'decoder.hh'
with self.open(file) as f:
+ f.write('#ifndef __ARCH_%(isa)s_GENERATED_DECODER_HH__\n'
+ '#define __ARCH_%(isa)s_GENERATED_DECODER_HH__\n\n' %
+ {'isa': self.isa_name.upper()})
fn = 'decoder-g.hh.inc'
assert(fn in self.files)
f.write('#include "%s"\n' % fn)
@@ -1649,6 +1652,8 @@
assert(fn in self.files)
f.write('namespace %s {\n#include "%s"\n}\n'
% (self.namespace, fn))
+ f.write('\n#endif // __ARCH_%s_GENERATED_DECODER_HH__\n' %
+ self.isa_name.upper())
# decoder method - cannot be split
file = 'decoder.cc'
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 7bc88ce..acb8824 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -136,6 +136,7 @@
EffAddrValid,
RecordResult,
Predicate,
+ MemAccPredicate,
PredTaken,
IsStrictlyOrdered,
ReqMade,
@@ -851,6 +852,16 @@
}
}
+ bool readMemAccPredicate() const
+ {
+ return instFlags[MemAccPredicate];
+ }
+
+ void setMemAccPredicate(bool val)
+ {
+ instFlags[MemAccPredicate] = val;
+ }
+
/** Sets the ASID. */
void setASID(short addr_space_id) { asid = addr_space_id; }
short getASID() { return asid; }
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index b1ed03f..3fda97e 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011, 2018 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -103,6 +103,7 @@
instFlags.reset();
instFlags[RecordResult] = true;
instFlags[Predicate] = true;
+ instFlags[MemAccPredicate] = true;
lqIdx = -1;
sqIdx = -1;
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index d18305e..4a40f38 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2016-2017 ARM Limited
+ * Copyright (c) 2011, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -436,6 +436,15 @@
thread->setPredicate(val);
}
+ bool readMemAccPredicate() const override
+ {
+ return thread->readMemAccPredicate();
+ }
+ void setMemAccPredicate(bool val) override
+ {
+ thread->setMemAccPredicate(val);
+ }
+
TheISA::PCState pcState() const override { return thread->pcState(); }
void pcState(const TheISA::PCState &val) override
{
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 400d9ec..d585096 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016-2017 ARM Limited
+ * Copyright (c) 2014, 2016-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -327,6 +327,8 @@
virtual bool readPredicate() const = 0;
virtual void setPredicate(bool val) = 0;
+ virtual bool readMemAccPredicate() const = 0;
+ virtual void setMemAccPredicate(bool val) = 0;
/** @} */
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index df66b11..a151c6a 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014, 2016-2017 ARM Limited
+ * Copyright (c) 2011-2014, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -331,6 +331,18 @@
thread.setPredicate(val);
}
+ bool
+ readMemAccPredicate() const override
+ {
+ return thread.readMemAccPredicate();
+ }
+
+ void
+ setMemAccPredicate(bool val) override
+ {
+ thread.setMemAccPredicate(val);
+ }
+
TheISA::PCState
pcState() const override
{
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 056297e..9705667 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -539,6 +539,16 @@
load_fault = inst->initiateAcc();
+ if (!inst->readMemAccPredicate()) {
+ assert(load_fault == NoFault);
+ assert(inst->readPredicate());
+ inst->setExecuted();
+ inst->completeAcc(nullptr);
+ iewStage->instToCommit(inst);
+ iewStage->activityThisCycle();
+ return NoFault;
+ }
+
if (inst->isTranslationDelayed() && load_fault == NoFault)
return load_fault;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index e473d13..0fc7cd6 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014-2017 ARM Limited
+ * Copyright (c) 2014-2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -530,6 +530,16 @@
}
}
+ bool readMemAccPredicate() const override
+ {
+ return thread->readMemAccPredicate();
+ }
+
+ void setMemAccPredicate(bool val) override
+ {
+ thread->setMemAccPredicate(val);
+ }
+
/**
* Invalidate a page in the DTLB <i>and</i> ITLB.
*/
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index c7c7ee7..d27a88c 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -128,6 +128,10 @@
/** Did this instruction execute or is it predicated false */
bool predicate;
+ /** True if the memory access should be skipped for this instruction
+ * Write back is still performed through completeAcc() */
+ bool memAccPredicate;
+
public:
std::string name() const
{
@@ -532,6 +536,16 @@
predicate = val;
}
+ bool readMemAccPredicate()
+ {
+ return memAccPredicate;
+ }
+
+ void setMemAccPredicate(bool val)
+ {
+ memAccPredicate = val;
+ }
+
MiscReg
readMiscRegNoEffect(int misc_reg, ThreadID tid = 0) const
{
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index abd5df9..80fbb8b 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -59,6 +59,7 @@
{
class ISA;
class Decoder;
+ class ISA;
}
class BaseCPU;
class BaseTLB;
@@ -577,6 +578,11 @@
void setPredicate(bool val)
{ actualTC->setPredicate(val); }
+ bool readMemAccPredicate() { return actualTC->readMemAccPredicate(); }
+
+ void setMemAccPredicate(bool val)
+ { actualTC->setMemAccPredicate(val); }
+
MiscReg readMiscRegNoEffect(int misc_reg) const
{ return actualTC->readMiscRegNoEffect(misc_reg); }
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/13521
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I891623015b47a39f61ed616f8896f32a7134c8e2
Gerrit-Change-Number: 13521
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Gabrielli <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev