Tong Shen has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/39495 )
Change subject: arch-x86: implement PSHUFB SSE instruction.
......................................................................
arch-x86: implement PSHUFB SSE instruction.
Change-Id: I9398f9ecb26b6aabf4015e0e285fdc2f4c2487dd
---
M src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
M src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
M src/arch/x86/isa/microops/mediaop.isa
3 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
index 3165eb7..0f4330b 100644
--- a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
+++ b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
@@ -31,7 +31,7 @@
'X86ISA::ThreeByte0F38Opcode': decode LEGACY_OP {
format WarnUnimpl {
1: decode OPCODE_OP {
- 0x00: pshufb_Vdq_Wdq();
+ 0x00: Inst::PSHUFB(Vo, Wo);
0x01: phaddw_Vdq_Wdq();
0x02: phaddd_Vdq_Wdq();
0x03: phaddsw_Vdq_Wdq();
diff --git
a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
index 6651d87..7beb2dd 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
@@ -84,4 +84,32 @@
ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
shuffle xmml, ufp1, ufp1, size=2, ext=imm
};
-'''
+
+def macroop PSHUFB_XMM_XMM {
+ movfp ufp1, xmmlm, dataSize=8
+ movfp ufp2, xmmhm, dataSize=8
+ shuffleb ufp1, xmml, xmmh
+ shuffleb ufp2, xmml, xmmh
+ movfp xmml, ufp1, dataSize=8
+ movfp xmmh, ufp2, dataSize=8
+};
+
+def macroop PSHUFB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ shuffleb ufp1, xmml, xmmh
+ shuffleb ufp2, xmml, xmmh
+ movfp xmml, ufp1, dataSize=8
+ movfp xmmh, ufp2, dataSize=8
+};
+
+def macroop PSHUFB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ shuffleb ufp1, xmml, xmmh
+ shuffleb ufp2, xmml, xmmh
+ movfp xmml, ufp1, dataSize=8
+ movfp xmmh, ufp2, dataSize=8
+};
+'''
\ No newline at end of file
diff --git a/src/arch/x86/isa/microops/mediaop.isa
b/src/arch/x86/isa/microops/mediaop.isa
index bf5fc67..124ea2d 100644
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -368,6 +368,48 @@
FpDestReg_uqw = result;
'''
+ class shuffleb(MediaOp):
+ def __init__(self, dest, src1, src2):
+ super(shuffleb, self).__init__(dest, src1, src2, 8)
+ op_class = 'SimdMiscOp'
+ code = '''
+ const int sizeBits = 8;
+ const int items = 8;
+ const int options = 16;
+ const int optionBits = 8;
+
+ uint64_t result = 0;
+ uint64_t sel = FpDestReg_uqw;
+
+ for (int i = 0; i < items; i++) {
+ uint64_t resBits;
+ uint8_t lsel = sel & mask(optionBits);
+
+ if ((lsel & 0x80) == 0) {
+ if (lsel >= options / 2) {
+ lsel -= options / 2;
+ resBits = bits(FpSrcReg2_uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ } else {
+ resBits = bits(FpSrcReg1_uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ }
+ } else {
+ resBits = 0;
+ }
+
+ sel >>= optionBits;
+
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+
+ FpDestReg_uqw = result;
+ '''
+
class Unpack(MediaOp):
op_class = 'SimdMiscOp'
code = '''
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/39495
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I9398f9ecb26b6aabf4015e0e285fdc2f4c2487dd
Gerrit-Change-Number: 39495
Gerrit-PatchSet: 1
Gerrit-Owner: Tong Shen <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s