You seem to be missing the majority of the PC relative version... Would you like to fix that up, or should I?
Gabe Gabe Black wrote: > I debated adding an hadd microop or adding a flag that changed the > behavior of maddf, but in the end I didn't do either since I didn't have > a ready way to test any implementation of hadd. Between those two I'd > probably go with the hadd microop since maddf might end up overly > complicated and hard to use. I think it would be reasonable (but not > necessarily the right thing to do) to have an hadd microop since that > might be something the SSE pipeline knew how to do directly. Since there > isn't one of those currently and this will get the instruction to work, > I think I'll commit your patch. In the future, I'd like to handle both > this and the wide shifts more at the individual microop level since I'd > imagine that better approximates the performance of real hardware. If > anyone knows if that's true and is allowed to tell us, please do. > > Gabe > > Vince Weaver wrote: > >> The patch below adds haddps support. >> >> It is quite complicated, I'm not sure if there is a better way to access >> 32-bit chunks of the xmm registers. >> >> attached is a small test program that tests the instruction. >> >> Vince >> >> >> # HG changeset patch >> # User Vince Weaver <[email protected]> >> # Date 1257285316 18000 >> # Node ID aef69bb302b5c60a74d4f53ede04058c262ea018 >> # Parent 0e5037cecaf776e18a6be727981a33144f4bde64 >> add support for X86 sse3 haddps instruction >> >> This patch adds support for the sse3 haddps instruction. >> >> The code ends up being fairly complicated, though I'm not sure >> if it can be done in a more compact way. >> >> This instruction is used by the vpr spec2k benchmark. >> >> diff -r 0e5037cecaf7 -r aef69bb302b5 >> src/arch/x86/isa/decoder/two_byte_opcodes.isa >> --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa Fri Oct 30 12:49:37 >> 2009 -0400 >> +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa Tue Nov 03 16:55:16 >> 2009 -0500 >> @@ -715,7 +715,7 @@ >> } >> // repne (0xF2) >> 0x8: decode OPCODE_OP_BOTTOM3 { >> - 0x4: WarnUnimpl::haddps_Vo_Wo(); >> + 0x4: HADDPS(Vo,Wo); >> 0x5: WarnUnimpl::hsubps_Vo_Wo(); >> default: UD2(); >> } >> diff -r 0e5037cecaf7 -r aef69bb302b5 >> src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py >> --- >> a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py >> Fri Oct 30 12:49:37 2009 -0400 >> +++ >> b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py >> Tue Nov 03 16:55:16 2009 -0500 >> @@ -54,7 +54,70 @@ >> # Authors: Gabe Black >> >> microcode = ''' >> -# HADDPS >> +def macroop HADDPS_XMM_XMM { >> + movfp ufp1, xmmh >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmmh >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + mslli ufp3, ufp3, 32, size=8, ext=0 >> + >> + movfp ufp1, xmml >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmml >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + >> + >> + >> + movfp ufp1, xmmhm >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmmhm >> + maddf ufp4, ufp1, ufp2, size=4, ext=1 >> + mslli ufp4, ufp4, 32, size=8, ext=0 >> + >> + movfp ufp1, xmmlm >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmmlm >> + maddf ufp4, ufp1, ufp2, size=4, ext=1 >> + >> + movfp xmml, ufp3 >> + movfp xmmh, ufp4 >> +}; >> + >> +def macroop HADDPS_XMM_M { >> + movfp ufp1, xmmh >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmmh >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + mslli ufp3, ufp3, 32, size=8, ext=0 >> + >> + movfp ufp1, xmml >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + movfp ufp2, xmml >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + >> + movfp xmml, ufp3 >> + >> + ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8 >> + movfp ufp2, ufp1 >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + mslli ufp3, ufp3, 32, size=8, ext=0 >> + >> + ldfp ufp1, seg, sib, disp, dataSize=8 >> + movfp ufp2, ufp1 >> + msrli ufp1, ufp1, 32, size=8, ext=0 >> + maddf ufp3, ufp1, ufp2, size=4, ext=1 >> + >> + movfp xmmh, ufp3 >> +}; >> + >> +def macroop HADDPS_XMM_P { >> + rdip t7 >> + ldfp ufp1, seg, riprel, disp, dataSize=8 >> + ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8 >> + maddf xmml, xmmh, xmml, size=8, ext=1 >> + maddf xmmh, ufp1, ufp2, size=8, ext=1 >> +}; >> >> def macroop HADDPD_XMM_XMM { >> maddf ufp1, xmmh , xmml, size=8, ext=1 >> ------------------------------------------------------------------------ >> >> _______________________________________________ >> m5-dev mailing list >> [email protected] >> http://m5sim.org/mailman/listinfo/m5-dev >> >> > > _______________________________________________ > m5-dev mailing list > [email protected] > http://m5sim.org/mailman/listinfo/m5-dev > _______________________________________________ m5-dev mailing list [email protected] http://m5sim.org/mailman/listinfo/m5-dev
