I debated adding an hadd microop or adding a flag that changed the 
behavior of maddf, but in the end I didn't do either since I didn't have 
a ready way to test any implementation of hadd. Between those two I'd 
probably go with the hadd microop since maddf might end up overly 
complicated and hard to use. I think it would be reasonable (but not 
necessarily the right thing to do) to have an hadd microop since that 
might be something the SSE pipeline knew how to do directly. Since there 
isn't one of those currently and this will get the instruction to work, 
I think I'll commit your patch. In the future, I'd like to handle both 
this and the wide shifts more at the individual microop level since I'd 
imagine that better approximates the performance of real hardware. If 
anyone knows if that's true and is allowed to tell us, please do.

Gabe

Vince Weaver wrote:
> The patch below adds haddps support.
>
> It is quite complicated, I'm not sure if there is a better way to access 
> 32-bit chunks of the xmm registers.
>
> attached is a small test program that tests the instruction.
>
> Vince
>
>
> # HG changeset patch
> # User Vince Weaver <[email protected]>
> # Date 1257285316 18000
> # Node ID aef69bb302b5c60a74d4f53ede04058c262ea018
> # Parent  0e5037cecaf776e18a6be727981a33144f4bde64
> add support for X86 sse3 haddps instruction
>
> This patch adds support for the sse3 haddps instruction.
>
> The code ends up being fairly complicated, though I'm not sure
> if it can be done in a more compact way.
>
> This instruction is used by the vpr spec2k benchmark.
>
> diff -r 0e5037cecaf7 -r aef69bb302b5 
> src/arch/x86/isa/decoder/two_byte_opcodes.isa
> --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa   Fri Oct 30 12:49:37 
> 2009 -0400
> +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa   Tue Nov 03 16:55:16 
> 2009 -0500
> @@ -715,7 +715,7 @@
>                      }
>                      // repne (0xF2)
>                      0x8: decode OPCODE_OP_BOTTOM3 {
> -                        0x4: WarnUnimpl::haddps_Vo_Wo();
> +                        0x4: HADDPS(Vo,Wo);
>                          0x5: WarnUnimpl::hsubps_Vo_Wo();
>                          default: UD2();
>                      }
> diff -r 0e5037cecaf7 -r aef69bb302b5 
> src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
> --- 
> a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>  Fri Oct 30 12:49:37 2009 -0400
> +++ 
> b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>  Tue Nov 03 16:55:16 2009 -0500
> @@ -54,7 +54,70 @@
>  # Authors: Gabe Black
>  
>  microcode = '''
> -# HADDPS
> +def macroop HADDPS_XMM_XMM {
> +    movfp ufp1, xmmh
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmh
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    movfp ufp1, xmml
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmml
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +
> +
> +    movfp ufp1, xmmhm
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmhm
> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
> +    mslli ufp4, ufp4, 32, size=8, ext=0
> +
> +    movfp ufp1, xmmlm
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmlm
> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmml, ufp3
> +    movfp xmmh, ufp4
> +};
> +
> +def macroop HADDPS_XMM_M {
> +    movfp ufp1, xmmh
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmh
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    movfp ufp1, xmml
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmml
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmml, ufp3
> +
> +    ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    ldfp ufp1, seg, sib, disp, dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmmh, ufp3
> +};
> +
> +def macroop HADDPS_XMM_P {
> +    rdip t7
> +    ldfp ufp1, seg, riprel, disp, dataSize=8
> +    ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
> +    maddf xmml, xmmh, xmml, size=8, ext=1
> +    maddf xmmh, ufp1, ufp2, size=8, ext=1
> +};
>  
>  def macroop HADDPD_XMM_XMM {
>      maddf ufp1, xmmh , xmml, size=8, ext=1
> ------------------------------------------------------------------------
>
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>   

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to