Looking through my email, I found this patch which I don't think ever
got committed. Do you know of any others like this, Vince? I found this
one specifically because I'm getting ready to commit my changes that
cleans up the ext flags, and this would need to be adjusted slightly.
The change is relatively minor, so you can go ahead and commit this and
I'll fix it up in my change, or you can wait and I can describe what
you'd need to do.

Gabe

Vince Weaver wrote:
> On Fri, 6 Nov 2009, Gabe Black wrote:
>
>   
>> You seem to be missing the majority of the PC relative version... Would 
>> you like to fix that up, or should I?
>>     
>
> The blow version properly has the PC relative version, which has been 
> tested.
>
> Vince
>
> # HG changeset patch
> # User Vince Weaver <[email protected]>
> # Date 1257285316 18000
> # Node ID aef69bb302b5c60a74d4f53ede04058c262ea018
> # Parent  0e5037cecaf776e18a6be727981a33144f4bde64
> add support for X86 sse3 haddps instruction
>
> This patch adds support for the sse3 haddps instruction.
>
> The code ends up being fairly complicated, though I'm not sure
> if it can be done in a more compact way.
>
> This instruction is used by the vpr spec2k benchmark.
>
> diff -r 2e67bb7c9b4c src/arch/x86/isa/decoder/two_byte_opcodes.isa
> --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa   Mon Nov 09 10:02:55 
> 2009 -0500
> +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa   Mon Nov 09 20:55:17 
> 2009 -0500
> @@ -715,7 +715,7 @@
>                      }
>                      // repne (0xF2)
>                      0x8: decode OPCODE_OP_BOTTOM3 {
> -                        0x4: WarnUnimpl::haddps_Vo_Wo();
> +                        0x4: HADDPS(Vo,Wo);
>                          0x5: WarnUnimpl::hsubps_Vo_Wo();
>                          default: UD2();
>                      }
> diff -r 2e67bb7c9b4c 
> src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
> --- 
> a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>  Mon Nov 09 10:02:55 2009 -0500
> +++ 
> b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>  Mon Nov 09 20:55:17 2009 -0500
> @@ -54,7 +54,90 @@
>  # Authors: Gabe Black
>  
>  microcode = '''
> -# HADDPS
> +def macroop HADDPS_XMM_XMM {
> +    movfp ufp1, xmmh
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmh
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    movfp ufp1, xmml
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmml
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp ufp1, xmmhm
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmhm
> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
> +    mslli ufp4, ufp4, 32, size=8, ext=0
> +
> +    movfp ufp1, xmmlm
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmlm
> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmml, ufp3
> +    movfp xmmh, ufp4
> +};
> +
> +def macroop HADDPS_XMM_M {
> +    movfp ufp1, xmmh
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmh
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    movfp ufp1, xmml
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmml
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmml, ufp3
> +
> +    ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    ldfp ufp1, seg, sib, disp, dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmmh, ufp3
> +};
> +
> +def macroop HADDPS_XMM_P {
> +    rdip t7
> +
> +    movfp ufp1, xmmh
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmmh
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    movfp ufp1, xmml
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    movfp ufp2, xmml
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmml, ufp3
> +
> +    ldfp ufp1, seg, riprel, "DISPLACEMENT+8", dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +    mslli ufp3, ufp3, 32, size=8, ext=0
> +
> +    ldfp ufp1, seg, riprel, disp, dataSize=8
> +    movfp ufp2, ufp1
> +    msrli ufp1, ufp1, 32, size=8, ext=0
> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
> +
> +    movfp xmmh, ufp3
> +};
>  
>  def macroop HADDPD_XMM_XMM {
>      maddf ufp1, xmmh , xmml, size=8, ext=1
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>   

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to