More results:
I get the same strange behaviour when I compile with -march=pentium2. I
could narrow it down to lib/GL/mesa/src/drv/radeon/radeon_state.c. Then
I split radeon_state.c and put only one non-static function at a time in
a new radeon_state2.c along with the necessary static functions. Only
radeon_state2 was compiled with -march=athlon.
I could reproduce the error with only radeonUpdateScissor and
radeonUpdateViewportOffset in radeon_state2.c. On the other hand I don't
get the error if I compile everything with -march=athlon except
radeon_state2.c which means that I've neatly isolated the problem :)
I had gcc generate assembler output for radeon_state2.c with
-mcpu=athlon and -march=athlon. A diff of the two assembler files and
radeon_state2.c (for comparing line numbers) are attached. The version
compiled with -march=athlon uses %mm0, the other one doesn't. My guess
is that some other part of the radeon driver or Mesa makes assumptions
about the MMX state which are not true when compiling with
-march=athlon.
I tried disabling MesaUse3DNow and MesaUseMMX in host.def, but that
didn't help.
Regards,
Felix
On Sat, 12 Oct 2002 18:57:45 +0200
Felix K�hling <[EMAIL PROTECTED]> wrote:
> Hello,
>
> I've looked into my gcc-3.2 problem again and found out that gcc-3.2
> with -march=athlon produces the problem I described in every detail in a
> previos mail (wirebox example).
>
> What made this so tedious to find (the problem was appearing and
> disappearing arbitrarily) is an inconsistency in the Makefiles. When I
> run a global make from xc/xc it uses the optimization options I
> specified in host.def (-O2 -march=athlon). When I run make locally in a
> subdirectory (I tried lib/GL/mesa/src/drv[/radeon]) it uses only -O2.
>
> I looked into the local Makefile and found that the definition of CFLAGS
> appears twice in the Makefile. The first one specifies -O2 -march=athlon
> in CDEBUGFLAGS, the second one specifies only -O2. So in effect a local
> make uses only -O2. Doing a global make CDEBUGFLAGS is specified on make
> the command line and of make for all subdirectories in xc/xc/xmakefile:
>
> for i in $(SUBDIRS) ;\
> do \
> echo "making" all "in $(CURRENT_DIR)/$$i..."; \
> $(MAKE) -C $$i $(MFLAGS) $(PARALLELMFLAGS) CDEBUGFLAGS="$(CDEBUGFLAGS)"
> all; \
> done
>
> This overrides the variable definitions of CDEBUGFLAGS in all
> subdirectories.
>
> Now we still have to find the exact cause of the problem with
> -march=athlon. First of all, can anyone reproduce it?
__\|/__ ___ ___ ___
__Tsch��_______\_6 6_/___/__ \___/__ \___/___\___You can do anything,___
_____Felix_______\�/\ \_____\ \_____\ \______U___just not everything____
[EMAIL PROTECTED] >o<__/ \___/ \___/ at the same time!
--- radeon_state2_noathlon.s 2002-10-13 01:11:30.000000000 +0200
+++ radeon_state2_athlon.s 2002-10-13 01:04:03.000000000 +0200
@@ -183,17 +183,17 @@
.loc 1 83 0
movl %eax, -16(%ebp)
.loc 1 85 0
- movl 12760(%ebx), %edi
+ movl 12760(%ebx), %esi
.loc 1 86 0
- fildl 28(%edi)
+ fildl 28(%esi)
fstps -24(%ebp)
.loc 1 87 0
- fildl 32(%edi)
+ fildl 32(%esi)
.loc 1 86 0
movl -24(%ebp), %ecx
.loc 1 87 0
fsts -24(%ebp)
- fildl 40(%edi)
+ fildl 40(%esi)
faddp %st, %st(1)
fsts -24(%ebp)
.loc 1 88 0
@@ -212,25 +212,23 @@
.loc 1 93 0
movl 216(%ebx), %edx
.loc 1 91 0
- movl -24(%ebp), %esi
+ movd -24(%ebp), %mm0
.loc 1 93 0
fildl 8(%edx)
movl %ecx, -24(%ebp)
flds -24(%ebp)
fxch %st(1)
- fucompp
- fnstsw %ax
- andb $69, %ah
- xorb $64, %ah
+ fucomip %st(1), %st
+ fstp %st(0)
jne .L14
+ jp .L14
fildl 16(%edx)
- movl %esi, -24(%ebp)
+ movd %mm0, -24(%ebp)
flds -24(%ebp)
fxch %st(1)
- fucompp
- fnstsw %ax
- andb $69, %ah
- cmpb $64, %ah
+ fucomip %st(1), %st
+ fstp %st(0)
+ jp .L14
je .L13
.L14:
.loc 1 105 0
@@ -240,7 +238,7 @@
.LBE6:
movl %ecx, 8(%edx)
.loc 1 100 0
- movl %esi, 16(%edx)
+ movd %mm0, 16(%edx)
.loc 1 111 0
.LBB7:
movl $31, %edx
@@ -248,18 +246,18 @@
.loc 1 105 0
movl %eax, -20(%ebp)
.loc 1 107 0
- movl 4(%eax), %esi
+ movl 4(%eax), %edi
.loc 1 111 0
- movl 28(%edi), %eax
+ movl 28(%esi), %eax
decl %eax
.loc 1 107 0
- andl $-7968, %esi
+ andl $-7968, %edi
.loc 1 111 0
andl $31, %eax
subl %eax, %ecx
.loc 1 112 0
- movl 40(%edi), %eax
- addl 32(%edi), %eax
+ movl 40(%esi), %eax
+ addl 32(%esi), %eax
decl %eax
andl $31, %eax
subl %eax, %edx
@@ -269,14 +267,14 @@
.loc 1 118 0
movl -20(%ebp), %edx
.loc 1 115 0
- orl %ecx, %esi
+ orl %ecx, %edi
.loc 1 118 0
- cmpl %esi, 4(%edx)
+ cmpl %edi, 4(%edx)
je .L13
.loc 1 119 0
movl 2408(%ebx), %eax
testl %eax, %eax
- jne .L22
+ jne .L23
.L17:
movl 260(%ebx), %edx
movl 264(%ebx), %eax
@@ -292,7 +290,7 @@
movl %eax, 4(%edx)
.loc 1 120 0
movl -20(%ebp), %edx
- movl %esi, 4(%edx)
+ movl %edi, 4(%edx)
.L13:
.loc 1 125 0
.LBE7:
@@ -305,7 +303,7 @@
leave
jmp radeonUpdateScissor
.p2align 6,,7
-.L22:
+.L23:
.LBB8:
movl %ebx, (%esp)
call *2408(%ebx)
@@ -62303,7 +62301,7 @@
.byte 0x55
.long 0x134f2
.byte 0x1
- .byte 0x57
+ .byte 0x56
.uleb128 0x37
.long .LC241
.byte 0x1
@@ -62353,7 +62351,7 @@
.byte 0x69
.long 0xf82a
.byte 0x1
- .byte 0x56
+ .byte 0x57
.byte 0x0
.byte 0x0
.uleb128 0x3c
/* $XFree86$ */
/*
* Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Gareth Hughes <[EMAIL PROTECTED]>
* Keith Whitwell <[EMAIL PROTECTED]>
*/
#include "radeon_context.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "radeon_tcl.h"
#include "radeon_tex.h"
#include "radeon_swtcl.h"
#include "radeon_vtxfmt.h"
#include "mem.h"
#include "mmath.h"
#include "enums.h"
#include "colormac.h"
#include "light.h"
#include "api_arrayelt.h"
#include "swrast/swrast.h"
#include "array_cache/acache.h"
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
#include "swrast_setup/swrast_setup.h"
#define MODEL_PROJ 0
#define MODEL 1
#define MODEL_IT 2
#define TEXMAT_0 3
#define TEXMAT_1 4
#define TEXMAT_2 5
static void radeonUpdateScissor( GLcontext *ctx )
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
if ( rmesa->dri.drawable ) {
__DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
int x = ctx->Scissor.X;
int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
int h = dPriv->h - ctx->Scissor.Y - 1;
rmesa->state.scissor.rect.x1 = x + dPriv->x;
rmesa->state.scissor.rect.y1 = y + dPriv->y;
rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
radeonRecalcScissorRects( rmesa );
}
}
void radeonUpdateViewportOffset( GLcontext *ctx )
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
__DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
GLfloat xoffset = (GLfloat)dPriv->x;
GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
const GLfloat *v = ctx->Viewport._WindowMap.m;
GLfloat tx = v[MAT_TX] + xoffset;
GLfloat ty = (- v[MAT_TY]) + yoffset;
if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx ||
rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty )
{
/* Note: this should also modify whatever data the context reset
* code uses...
*/
rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
/* update polygon stipple x/y screen offset */
{
GLuint stx, sty;
GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
RADEON_STIPPLE_Y_OFFSET_MASK);
/* add magic offsets, then invert */
stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
& RADEON_STIPPLE_COORD_MASK);
m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
(sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
RADEON_STATECHANGE( rmesa, msc );
rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
}
}
}
radeonUpdateScissor( ctx );
}