Hello was doing some more work trying to get spec2k running on x86_64.
Below is a patch where I implemented movdqu/movdqa. This is enough (in conjunction with some other syscall patches) to get many of the benchmarks working. I'm not sure if doing the 16-byte loads in two pieces is the right way to go. Is the proper way to extend the uop code to handle 16-byte instructions natively? The gcc benchmark wants the (currently unimplemented on m5) pslldq and psrldq instructions (which do shifts on 128-bit values). I couldn't figure out a good way to do that with the current uop setup, though it would be trivial to do if 16-byte sized operations were supported. Vince # HG changeset patch # User Vince Weaver <[email protected]> # Date 1256146843 14400 # Node ID 2c977ee7f79183736dcfd27d52a9a0b4ced474dd # Parent 85f090fe7563fdc5d3aeb8189f96fb00487fc606 Implement X86 sse2 movdqu and movdqa instructions The movdqa instruction should enforce 16-byte alignment. This implementation does not do that. These instructions are needed for most of x86_64 spec2k to run. diff -r 85f090fe7563 -r 2c977ee7f791 src/arch/x86/isa/decoder/two_byte_opcodes.isa --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa Tue Oct 20 16:48:00 2009 -0400 +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa Wed Oct 21 13:40:43 2009 -0400 @@ -604,7 +604,7 @@ } // repe (0xF3) 0x4: decode OPCODE_OP_BOTTOM3 { - 0x7: WarnUnimpl::movdqu_Vo_Wo(); + 0x7: MOVDQU(Vo,Wo); default: UD2(); } // operand size (0x66) @@ -616,7 +616,7 @@ 0x4: PUNPCKLQDQ(Vo,Wq); 0x5: PUNPCKHQDQ(Vo,Wq); 0x6: WarnUnimpl::movd_Vo_Ed(); - 0x7: WarnUnimpl::movdqa_Vo_Wo(); + 0x7: MOVDQA(Vo,Wo); } default: UD2(); } @@ -702,7 +702,7 @@ // repe (0xF3) 0x4: decode OPCODE_OP_BOTTOM3 { 0x6: MOVQ(Vq,Wq); - 0x7: WarnUnimpl::movdqu_Wo_Vo(); + 0x7: MOVDQU(Wo,Vo); default: UD2(); } // operand size (0x66) @@ -710,7 +710,7 @@ 0x4: WarnUnimpl::haddpd_Vo_Wo(); 0x5: WarnUnimpl::hsubpd_Vo_Wo(); 0x6: WarnUnimpl::movd_Ed_Vd(); - 0x7: WarnUnimpl::movdqa_Wo_Vo(); + 0x7: MOVDQA(Wo,Vo); default: UD2(); } // repne (0xF2) diff -r 85f090fe7563 -r 2c977ee7f791 src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py Tue Oct 20 16:48:00 2009 -0400 +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py Wed Oct 21 13:40:43 2009 -0400 @@ -87,7 +87,63 @@ movfp xmml, mmxm, dataSize=8 lfpimm xmmh, 0 }; + +def macroop MOVDQA_XMM_XMM { + movfp xmml, xmmlm + movfp xmmh, xmmhm +}; + +def macroop MOVDQA_XMM_M { +# FIXME : alignment exception? + ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQA_XMM_P { +# FIXME : alignment exception? + rdip t7 + ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQA_M_XMM { +# FIXME : alignment exception? + stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQA_P_XMM { +# FIXME : alignment exception? + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQU_XMM_XMM { + movfp xmml, xmmlm + movfp xmmh, xmmhm +}; + +def macroop MOVDQU_XMM_M { + ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQU_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQU_M_XMM { + stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVDQU_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; ''' -# MOVDQA -# MOVDQU # LDDQU _______________________________________________ m5-dev mailing list [email protected] http://m5sim.org/mailman/listinfo/m5-dev
