The patch below adds haddps support.

It is quite complicated, I'm not sure if there is a better way to access 
32-bit chunks of the xmm registers.

attached is a small test program that tests the instruction.

Vince


# HG changeset patch
# User Vince Weaver <[email protected]>
# Date 1257285316 18000
# Node ID aef69bb302b5c60a74d4f53ede04058c262ea018
# Parent  0e5037cecaf776e18a6be727981a33144f4bde64
add support for X86 sse3 haddps instruction

This patch adds support for the sse3 haddps instruction.

The code ends up being fairly complicated, though I'm not sure
if it can be done in a more compact way.

This instruction is used by the vpr spec2k benchmark.

diff -r 0e5037cecaf7 -r aef69bb302b5 
src/arch/x86/isa/decoder/two_byte_opcodes.isa
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa     Fri Oct 30 12:49:37 
2009 -0400
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa     Tue Nov 03 16:55:16 
2009 -0500
@@ -715,7 +715,7 @@
                     }
                     // repne (0xF2)
                     0x8: decode OPCODE_OP_BOTTOM3 {
-                        0x4: WarnUnimpl::haddps_Vo_Wo();
+                        0x4: HADDPS(Vo,Wo);
                         0x5: WarnUnimpl::hsubps_Vo_Wo();
                         default: UD2();
                     }
diff -r 0e5037cecaf7 -r aef69bb302b5 
src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
--- 
a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
   Fri Oct 30 12:49:37 2009 -0400
+++ 
b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/horizontal_addition.py
   Tue Nov 03 16:55:16 2009 -0500
@@ -54,7 +54,70 @@
 # Authors: Gabe Black
 
 microcode = '''
-# HADDPS
+def macroop HADDPS_XMM_XMM {
+    movfp ufp1, xmmh
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmmh
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+    mslli ufp3, ufp3, 32, size=8, ext=0
+
+    movfp ufp1, xmml
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmml
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+
+
+
+    movfp ufp1, xmmhm
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmmhm
+    maddf ufp4, ufp1, ufp2, size=4, ext=1
+    mslli ufp4, ufp4, 32, size=8, ext=0
+
+    movfp ufp1, xmmlm
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmmlm
+    maddf ufp4, ufp1, ufp2, size=4, ext=1
+
+    movfp xmml, ufp3
+    movfp xmmh, ufp4
+};
+
+def macroop HADDPS_XMM_M {
+    movfp ufp1, xmmh
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmmh
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+    mslli ufp3, ufp3, 32, size=8, ext=0
+
+    movfp ufp1, xmml
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    movfp ufp2, xmml
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+
+    movfp xmml, ufp3
+
+    ldfp ufp1, seg, sib, "DISPLACEMENT+8", dataSize=8
+    movfp ufp2, ufp1
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+    mslli ufp3, ufp3, 32, size=8, ext=0
+
+    ldfp ufp1, seg, sib, disp, dataSize=8
+    movfp ufp2, ufp1
+    msrli ufp1, ufp1, 32, size=8, ext=0
+    maddf ufp3, ufp1, ufp2, size=4, ext=1
+
+    movfp xmmh, ufp3
+};
+
+def macroop HADDPS_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, disp, dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
+    maddf xmml, xmmh, xmml, size=8, ext=1
+    maddf xmmh, ufp1, ufp2, size=8, ext=1
+};
 
 def macroop HADDPD_XMM_XMM {
     maddf ufp1, xmmh , xmml, size=8, ext=1

Attachment: haddps
Description: Binary data

_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to