> Here is my first patch for Nouveau project: a fix for 3D software
> rendering using SSE2 instruction set.
>
> The problem is that Gallium doesn't save/restore used registers
> (eax, edx, ecx, esi in my case). So I added push/pop in
> tgsi_emit_sse2().

 Doesn't the ABI say that those are scratch registers?  That is, they
 don't need to be saved by the callee.

On Linux, the calling method is "cdecl". Marcheu told me that EAX, ECX and EDX can be used but not ESI (ESI should be saved). So here is a smaller patch: only save/restore ESI ("temp base").

About the crash: it occurs with Nouveau driver (yesterday GIT version) and NeverBall game. The bug only occurs with gcc 4.2, not with gcc 4.1. Gallium (mesa) is compiled with -O (-O1).

Victor Stinner
PS: I just subscribed to the mailing list
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index a4503c1..f11f9c6 100644
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 4e80597..2d4e707 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1998,6 +1998,9 @@ emit_instruction(
 
    case TGSI_OPCODE_RET:
    case TGSI_OPCODE_END:
+       emit_pop(
+          func,
+          get_temp_base() );
 #ifdef WIN32
       emit_retw( func, 16 );
 #else
@@ -2248,22 +2251,26 @@ tgsi_emit_sse2(
 
    func->csr = func->store;
 
+   emit_push(
+      func,
+      get_temp_base() );
+
    emit_mov(
       func,
       get_input_base(),
-      get_argument( 0 ) );
+      get_argument( 0+1 ) );
    emit_mov(
       func,
       get_output_base(),
-      get_argument( 1 ) );
+      get_argument( 1+1 ) );
    emit_mov(
       func,
       get_const_base(),
-      get_argument( 2 ) );
+      get_argument( 2+1 ) );
    emit_mov(
       func,
       get_temp_base(),
-      get_argument( 3 ) );
+      get_argument( 3+1 ) );
 
    tgsi_parse_init( &parse, tokens );
 
@@ -2327,22 +2334,26 @@ tgsi_emit_sse2_fs(
    func->csr = func->store;
 
    /* DECLARATION phase, do not load output argument. */
+   emit_push(
+      func,
+      get_temp_base() );
+
    emit_mov(
       func,
       get_input_base(),
-      get_argument( 0 ) );
+      get_argument( 0+1 ) );
    emit_mov(
       func,
       get_const_base(),
-      get_argument( 2 ) );
+      get_argument( 2+1 ) );
    emit_mov(
       func,
       get_temp_base(),
-      get_argument( 3 ) );
+      get_argument( 3+1 ) );
    emit_mov(
       func,
       get_coef_base(),
-      get_argument( 4 ) );
+      get_argument( 4+1 ) );
 
    tgsi_parse_init( &parse, tokens );
 
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to