changeset a39de7b8d2c9 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=a39de7b8d2c9
description:
x86: Rework opcode parsing to support 3 byte opcodes properly.
Instead of counting the number of opcode bytes in an instruction and
recording
each byte before the actual opcode, we can represent the path we took
to get to
the actual opcode byte by using a type code. That has a couple of
advantages.
First, we can disambiguate the properties of opcodes of the same length
which
have different properties. Second, it reduces the amount of data stored
in an
ExtMachInst, making them slightly easier/faster to create and process.
This
also adds some flexibility as far as how different types of opcodes are
handled, which might come in handy if we decide to support VEX or XOP
instructions.
This change also adds tables to support properly decoding 3 byte
opcodes.
Before we would fall off the end of some arrays, on top of the ambiguity
described above.
This change doesn't measureably affect performance on the twolf
benchmark.
diffstat:
src/arch/x86/decoder.cc | 193 +-
src/arch/x86/decoder.hh | 29 +-
src/arch/x86/decoder_tables.cc | 259 +-
src/arch/x86/isa/bitfields.isa | 4 +-
src/arch/x86/isa/decoder/decoder.isa | 10 +-
src/arch/x86/isa/decoder/locked_opcodes.isa | 62 +-
src/arch/x86/isa/decoder/one_byte_opcodes.isa | 2 +-
src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa | 110 +
src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa | 65 +
src/arch/x86/isa/decoder/three_byte_opcodes.isa | 151 -
src/arch/x86/isa/decoder/two_byte_opcodes.isa | 1926 ++++++++---------
src/arch/x86/isa_traits.hh | 2 +-
src/arch/x86/types.cc | 9 +-
src/arch/x86/types.hh | 62 +-
14 files changed, 1531 insertions(+), 1353 deletions(-)
diffs (truncated from 3201 to 300 lines):
diff -r 7734249c92b9 -r a39de7b8d2c9 src/arch/x86/decoder.cc
--- a/src/arch/x86/decoder.cc Thu Dec 04 15:52:48 2014 -0800
+++ b/src/arch/x86/decoder.cc Thu Dec 04 15:53:54 2014 -0800
@@ -48,9 +48,8 @@
emi.rex = 0;
emi.legacy = 0;
- emi.opcode.num = 0;
+ emi.opcode.type = BadOpcode;
emi.opcode.op = 0;
- emi.opcode.prefixA = emi.opcode.prefixB = 0;
immediateCollected = 0;
emi.immediate = 0;
@@ -94,8 +93,17 @@
case PrefixState:
state = doPrefixState(nextByte);
break;
- case OpcodeState:
- state = doOpcodeState(nextByte);
+ case OneByteOpcodeState:
+ state = doOneByteOpcodeState(nextByte);
+ break;
+ case TwoByteOpcodeState:
+ state = doTwoByteOpcodeState(nextByte);
+ break;
+ case ThreeByte0F38OpcodeState:
+ state = doThreeByte0F38OpcodeState(nextByte);
+ break;
+ case ThreeByte0F3AOpcodeState:
+ state = doThreeByte0F3AOpcodeState(nextByte);
break;
case ModRMState:
state = doModRMState(nextByte);
@@ -199,7 +207,7 @@
emi.rex = nextByte;
break;
case 0:
- nextState = OpcodeState;
+ nextState = OneByteOpcodeState;
break;
default:
panic("Unrecognized prefix %#x\n", nextByte);
@@ -207,79 +215,132 @@
return nextState;
}
-//Load all the opcodes (currently up to 2) and then figure out
-//what immediate and/or ModRM is needed.
+// Load the first opcode byte. Determine if there are more opcode bytes, and
+// if not, what immediate and/or ModRM is needed.
Decoder::State
-Decoder::doOpcodeState(uint8_t nextByte)
+Decoder::doOneByteOpcodeState(uint8_t nextByte)
{
State nextState = ErrorState;
- emi.opcode.num++;
- //We can't handle 3+ byte opcodes right now
- assert(emi.opcode.num < 4);
consumeByte();
- if(emi.opcode.num == 1 && nextByte == 0x0f)
- {
- nextState = OpcodeState;
- DPRINTF(Decoder, "Found two byte opcode.\n");
- emi.opcode.prefixA = nextByte;
- }
- else if(emi.opcode.num == 2 && (nextByte == 0x38 || nextByte == 0x3A))
- {
- nextState = OpcodeState;
- DPRINTF(Decoder, "Found three byte opcode.\n");
- emi.opcode.prefixB = nextByte;
- }
- else
- {
- DPRINTF(Decoder, "Found opcode %#x.\n", nextByte);
+ if (nextByte == 0x0f) {
+ nextState = TwoByteOpcodeState;
+ DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+ } else {
+ DPRINTF(Decoder, "Found one byte opcode %#x.\n", nextByte);
+ emi.opcode.type = OneByteOpcode;
emi.opcode.op = nextByte;
- //Figure out the effective operand size. This can be overriden to
- //a fixed value at the decoder level.
- int logOpSize;
- if (emi.rex.w)
- logOpSize = 3; // 64 bit operand size
- else if (emi.legacy.op)
- logOpSize = altOp;
- else
- logOpSize = defOp;
+ nextState = processOpcode(ImmediateTypeOneByte, UsesModRMOneByte,
+ nextByte >= 0xA0 && nextByte <= 0xA3);
+ }
+ return nextState;
+}
- //Set the actual op size
- emi.opSize = 1 << logOpSize;
+// Load the second opcode byte. Determine if there are more opcode bytes, and
+// if not, what immediate and/or ModRM is needed.
+Decoder::State
+Decoder::doTwoByteOpcodeState(uint8_t nextByte)
+{
+ State nextState = ErrorState;
+ consumeByte();
+ if (nextByte == 0x38) {
+ nextState = ThreeByte0F38OpcodeState;
+ DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+ } else if (nextByte == 0x3a) {
+ nextState = ThreeByte0F3AOpcodeState;
+ DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+ } else {
+ DPRINTF(Decoder, "Found two byte opcode %#x.\n", nextByte);
+ emi.opcode.type = TwoByteOpcode;
+ emi.opcode.op = nextByte;
- //Figure out the effective address size. This can be overriden to
- //a fixed value at the decoder level.
- int logAddrSize;
- if(emi.legacy.addr)
- logAddrSize = altAddr;
- else
- logAddrSize = defAddr;
+ nextState = processOpcode(ImmediateTypeTwoByte, UsesModRMTwoByte);
+ }
+ return nextState;
+}
- //Set the actual address size
- emi.addrSize = 1 << logAddrSize;
+// Load the third opcode byte and determine what immediate and/or ModRM is
+// needed.
+Decoder::State
+Decoder::doThreeByte0F38OpcodeState(uint8_t nextByte)
+{
+ consumeByte();
- //Figure out the effective stack width. This can be overriden to
- //a fixed value at the decoder level.
- emi.stackSize = 1 << stack;
+ DPRINTF(Decoder, "Found three byte 0F38 opcode %#x.\n", nextByte);
+ emi.opcode.type = ThreeByte0F38Opcode;
+ emi.opcode.op = nextByte;
- //Figure out how big of an immediate we'll retreive based
- //on the opcode.
- int immType = ImmediateType[emi.opcode.num - 1][nextByte];
- if (emi.opcode.num == 1 && nextByte >= 0xA0 && nextByte <= 0xA3)
- immediateSize = SizeTypeToSize[logAddrSize - 1][immType];
- else
- immediateSize = SizeTypeToSize[logOpSize - 1][immType];
+ return processOpcode(ImmediateTypeThreeByte0F38, UsesModRMThreeByte0F38);
+}
- //Determine what to expect next
- if (UsesModRM[emi.opcode.num - 1][nextByte]) {
- nextState = ModRMState;
+// Load the third opcode byte and determine what immediate and/or ModRM is
+// needed.
+Decoder::State
+Decoder::doThreeByte0F3AOpcodeState(uint8_t nextByte)
+{
+ consumeByte();
+
+ DPRINTF(Decoder, "Found three byte 0F3A opcode %#x.\n", nextByte);
+ emi.opcode.type = ThreeByte0F3AOpcode;
+ emi.opcode.op = nextByte;
+
+ return processOpcode(ImmediateTypeThreeByte0F3A, UsesModRMThreeByte0F3A);
+}
+
+// Generic opcode processing which determines the immediate size, and whether
+// or not there's a modrm byte.
+Decoder::State
+Decoder::processOpcode(ByteTable &immTable, ByteTable &modrmTable,
+ bool addrSizedImm)
+{
+ State nextState = ErrorState;
+ const uint8_t opcode = emi.opcode.op;
+
+ //Figure out the effective operand size. This can be overriden to
+ //a fixed value at the decoder level.
+ int logOpSize;
+ if (emi.rex.w)
+ logOpSize = 3; // 64 bit operand size
+ else if (emi.legacy.op)
+ logOpSize = altOp;
+ else
+ logOpSize = defOp;
+
+ //Set the actual op size
+ emi.opSize = 1 << logOpSize;
+
+ //Figure out the effective address size. This can be overriden to
+ //a fixed value at the decoder level.
+ int logAddrSize;
+ if(emi.legacy.addr)
+ logAddrSize = altAddr;
+ else
+ logAddrSize = defAddr;
+
+ //Set the actual address size
+ emi.addrSize = 1 << logAddrSize;
+
+ //Figure out the effective stack width. This can be overriden to
+ //a fixed value at the decoder level.
+ emi.stackSize = 1 << stack;
+
+ //Figure out how big of an immediate we'll retreive based
+ //on the opcode.
+ int immType = immTable[opcode];
+ if (addrSizedImm)
+ immediateSize = SizeTypeToSize[logAddrSize - 1][immType];
+ else
+ immediateSize = SizeTypeToSize[logOpSize - 1][immType];
+
+ //Determine what to expect next
+ if (modrmTable[opcode]) {
+ nextState = ModRMState;
+ } else {
+ if(immediateSize) {
+ nextState = ImmediateState;
} else {
- if(immediateSize) {
- nextState = ImmediateState;
- } else {
- instDone = true;
- nextState = ResetState;
- }
+ instDone = true;
+ nextState = ResetState;
}
}
return nextState;
@@ -315,7 +376,7 @@
// The "test" instruction in group 3 needs an immediate, even though
// the other instructions with the same actual opcode don't.
- if (emi.opcode.num == 1 && (modRM.reg & 0x6) == 0) {
+ if (emi.opcode.type == OneByteOpcode && (modRM.reg & 0x6) == 0) {
if (emi.opcode.op == 0xF6)
immediateSize = 1;
else if (emi.opcode.op == 0xF7)
diff -r 7734249c92b9 -r a39de7b8d2c9 src/arch/x86/decoder.hh
--- a/src/arch/x86/decoder.hh Thu Dec 04 15:52:48 2014 -0800
+++ b/src/arch/x86/decoder.hh Thu Dec 04 15:53:54 2014 -0800
@@ -51,10 +51,19 @@
{
private:
//These are defined and documented in decoder_tables.cc
- static const uint8_t Prefixes[256];
- static const uint8_t UsesModRM[2][256];
- static const uint8_t ImmediateType[2][256];
static const uint8_t SizeTypeToSize[3][10];
+ typedef const uint8_t ByteTable[256];
+ static ByteTable Prefixes;
+
+ static ByteTable UsesModRMOneByte;
+ static ByteTable UsesModRMTwoByte;
+ static ByteTable UsesModRMThreeByte0F38;
+ static ByteTable UsesModRMThreeByte0F3A;
+
+ static ByteTable ImmediateTypeOneByte;
+ static ByteTable ImmediateTypeTwoByte;
+ static ByteTable ImmediateTypeThreeByte0F38;
+ static ByteTable ImmediateTypeThreeByte0F3A;
protected:
struct InstBytes
@@ -166,7 +175,10 @@
ResetState,
FromCacheState,
PrefixState,
- OpcodeState,
+ OneByteOpcodeState,
+ TwoByteOpcodeState,
+ ThreeByte0F38OpcodeState,
+ ThreeByte0F3AOpcodeState,
ModRMState,
SIBState,
DisplacementState,
@@ -181,12 +193,19 @@
State doResetState();
State doFromCacheState();
State doPrefixState(uint8_t);
- State doOpcodeState(uint8_t);
+ State doOneByteOpcodeState(uint8_t);
+ State doTwoByteOpcodeState(uint8_t);
+ State doThreeByte0F38OpcodeState(uint8_t);
+ State doThreeByte0F3AOpcodeState(uint8_t);
State doModRMState(uint8_t);
State doSIBState(uint8_t);
State doDisplacementState();
State doImmediateState();
+ //Process the actual opcode found earlier, using the supplied tables.
+ State processOpcode(ByteTable &immTable, ByteTable &modrmTable,
+ bool addrSizedImm = false);
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev