Re: [m5-dev] [patch] add support for X86 sse3 haddps instruction

Gabe Black Fri, 06 Nov 2009 00:11:46 -0800

You seem to be missing the majority of the PC relative version... Would 
you like to fix that up, or should I?


Gabe

Gabe Black wrote:
> I debated adding an hadd microop or adding a flag that changed the 
> behavior of maddf, but in the end I didn't do either since I didn't have 
> a ready way to test any implementation of hadd. Between those two I'd 
> probably go with the hadd microop since maddf might end up overly 
> complicated and hard to use. I think it would be reasonable (but not 
> necessarily the right thing to do) to have an hadd microop since that 
> might be something the SSE pipeline knew how to do directly. Since there 
> isn't one of those currently and this will get the instruction to work, 
> I think I'll commit your patch. In the future, I'd like to handle both 
> this and the wide shifts more at the individual microop level since I'd 
> imagine that better approximates the performance of real hardware. If 
> anyone knows if that's true and is allowed to tell us, please do.
>
> Gabe
>
> Vince Weaver wrote:
>   
>> The patch below adds haddps support.
>>
>> It is quite complicated, I'm not sure if there is a better way to access 
>> 32-bit chunks of the xmm registers.
>>
>> attached is a small test program that tests the instruction.
>>
>> Vince
>>
>>
>> # HG changeset patch
>> # User Vince Weaver <[email protected]>
>> # Date 1257285316 18000
>> # Node ID aef69bb302b5c60a74d4f53ede04058c262ea018
>> # Parent  0e5037cecaf776e18a6be727981a33144f4bde64
>> add support for X86 sse3 haddps instruction
>>
>> This patch adds support for the sse3 haddps instruction.
>>
>> The code ends up being fairly complicated, though I'm not sure
>> if it can be done in a more compact way.
>>
>> This instruction is used by the vpr spec2k benchmark.
>>
>> diff -r 0e5037cecaf7 -r aef69bb302b5 
>> src/arch/x86/isa/decoder/two_byte_opcodes.isa
>> --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa  Fri Oct 30 12:49:37 
>> 2009 -0400
>> +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa  Tue Nov 03 16:55:16 
>> 2009 -0500
>> @@ -715,7 +715,7 @@
>>                      }
>>                      // repne (0xF2)
>>                      0x8: decode OPCODE_OP_BOTTOM3 {
>> -                        0x4: WarnUnimpl::haddps_Vo_Wo();
>> +                        0x4: HADDPS(Vo,Wo);
>>                          0x5: WarnUnimpl::hsubps_Vo_Wo();
>>                          default: UD2();
>>                      }
>> diff -r 0e5037cecaf7 -r aef69bb302b5 
>> src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>> --- 
>> a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>>         Fri Oct 30 12:49:37 2009 -0400
>> +++ 
>> b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
>>         Tue Nov 03 16:55:16 2009 -0500
>> @@ -54,7 +54,70 @@
>>  # Authors: Gabe Black
>>  
>>  microcode = '''
>> -# HADDPS
>> +def macroop HADDPS_XMM_XMM {
>> +    movfp ufp1, xmmh
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmmh
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +    mslli ufp3, ufp3, 32, size=8, ext=0
>> +
>> +    movfp ufp1, xmml
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmml
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +
>> +
>> +
>> +    movfp ufp1, xmmhm
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmmhm
>> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
>> +    mslli ufp4, ufp4, 32, size=8, ext=0
>> +
>> +    movfp ufp1, xmmlm
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmmlm
>> +    maddf ufp4, ufp1, ufp2, size=4, ext=1
>> +
>> +    movfp xmml, ufp3
>> +    movfp xmmh, ufp4
>> +};
>> +
>> +def macroop HADDPS_XMM_M {
>> +    movfp ufp1, xmmh
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmmh
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +    mslli ufp3, ufp3, 32, size=8, ext=0
>> +
>> +    movfp ufp1, xmml
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    movfp ufp2, xmml
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +
>> +    movfp xmml, ufp3
>> +
>> +    ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8
>> +    movfp ufp2, ufp1
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +    mslli ufp3, ufp3, 32, size=8, ext=0
>> +
>> +    ldfp ufp1, seg, sib, disp, dataSize=8
>> +    movfp ufp2, ufp1
>> +    msrli ufp1, ufp1, 32, size=8, ext=0
>> +    maddf ufp3, ufp1, ufp2, size=4, ext=1
>> +
>> +    movfp xmmh, ufp3
>> +};
>> +
>> +def macroop HADDPS_XMM_P {
>> +    rdip t7
>> +    ldfp ufp1, seg, riprel, disp, dataSize=8
>> +    ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
>> +    maddf xmml, xmmh, xmml, size=8, ext=1
>> +    maddf xmmh, ufp1, ufp2, size=8, ext=1
>> +};
>>  
>>  def macroop HADDPD_XMM_XMM {
>>      maddf ufp1, xmmh , xmml, size=8, ext=1
>> ------------------------------------------------------------------------
>>
>> _______________________________________________
>> m5-dev mailing list
>> [email protected]
>> http://m5sim.org/mailman/listinfo/m5-dev
>>   
>>     
>
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>   

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Re: [m5-dev] [patch] add support for X86 sse3 haddps instruction

Reply via email to