Hello,
somehow something got ommited, probably on my side.
[as I can tell from the end of the patch, since there are references
to removed code...]
I'd like to send more cleanups if you are interested ;-)
Regards,
Petr
Btw: Unfortunatelly, I have noticed that the current SSE code tends to
be a little slower than the x86 path, at least on my XP2000+... not very
much, but still. Maybe this is given by the AMD SSE implementation
or more likely, because we don't smartly precache data while we can.
Dieter Nützel wrote:
Am Dienstag, 31. Dezember 2002 04:20 schrieb Petr Sebor:
Hi,
I have recently rewritten the Mesa CPU detection code and
would like someone else to give it a try.
Basically, the highlights are:
- added _mesa_x86_has_cpuid
- added _mesa_x86_cpuid
- added _mesa_x86_cpuid_eax
- added _mesa_x86_cpuid_ebx
- added _mesa_x86_cpuid_ecx
- added _mesa_x86_cpuid_edx
- removed _mesa_identify_x86_cpu_features
- differentiated extended cpu features (in ..x86_features.h)
from the std ones
- changed the X86_FEATURE semantics a little bit
with these tools, I am trying to parse the cpu feature while
making distinction between the normal and extended cpu
feature sets - no need to query the cpu vendor string, while
making the code highly readable for people not familiar with
the assembly langugage.
Please consider applying...
I vote for it.
Petr, with your cleanup and Linus asm fix MesaCVS (5.1) works without a glitch
for me. Next I'll try it with the DRI CVS trunk.
Done.
Works fine.
Mesa/demos> ./glinfo
cpu vendor: AuthenticAMD
cpu name: AMD Athlon(tm) MP 1900+
MMX cpu detected.
3DNow! cpu detected.
Testing OS support for SSE... yes.
Testing OS support for SSE unmasked exceptions... SIGFPE, yes.
Tests of OS support for SSE passed.
SSE cpu detected.
r200AgeTextures 0
r200AgeTextures 1
GL_VERSION: 1.2 Mesa 5.0
GL_EXTENSIONS: GL_ARB_imaging GL_ARB_multitexture GL_ARB_texture_border_clamp
GL_ARB_texture_cube_map GL_ARB_texture_env_add GL_ARB_texture_env_combine
GL_ARB_texture_env_dot3 GL_ARB_texture_mirrored_repeat
GL_ARB_transpose_matrix GL_ARB_window_pos GL_ATI_texture_mirror_once
GL_EXT_abgr GL_EXT_bgra GL_EXT_blend_color GL_EXT_blend_logic_op
GL_EXT_blend_minmax GL_EXT_blend_subtract GL_EXT_clip_volume_hint
GL_EXT_convolution GL_EXT_compiled_vertex_array GL_EXT_histogram
GL_EXT_packed_pixels GL_EXT_polygon_offset GL_EXT_rescale_normal
GL_EXT_secondary_color GL_EXT_stencil_wrap GL_EXT_texture3D
GL_EXT_texture_edge_clamp GL_EXT_texture_env_add GL_EXT_texture_env_combine
GL_EXT_texture_env_dot3 GL_EXT_texture_filter_anisotropic
GL_EXT_texture_object GL_EXT_texture_lod_bias GL_EXT_vertex_array
GL_IBM_rasterpos_clip GL_MESA_pack_invert GL_MESA_ycbcr_texture
GL_MESA_window_pos GL_NV_texture_rectangle GL_NV_texgen_reflection
GL_SGI_color_matrix GL_SGI_color_table
GL_RENDERER: Mesa DRI R200 20021125 AGP 4x x86/MMX/3DNow!/SSE TCL
GL_VENDOR: Tungsten Graphics, Inc.
GLU_VERSION: 1.3
GLU_EXTENSIONS: GLU_EXT_nurbs_tessellator GLU_EXT_object_space_tess
GLUT_API_VERSION: 5
GLUT_XLIB_IMPLEMENTATION: 15
Thank you!
Dieter
------------------------------------------------------------------------
Index: common_x86.c
===================================================================
RCS file: /cvsroot/mesa3d/Mesa/src/X86/common_x86.c,v
retrieving revision 1.20
diff -u -r1.20 common_x86.c
--- common_x86.c 13 Nov 2002 15:03:31 -0000 1.20
+++ common_x86.c 31 Dec 2002 03:02:45 -0000
@@ -52,8 +52,14 @@
/* No reason for this to be public.
*/
-extern int _mesa_identify_x86_cpu_features( void );
+extern int _mesa_identify_x86_cpu_features(void);
+extern GLuint _mesa_x86_has_cpuid(void);
+extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
+extern GLuint _mesa_x86_cpuid_eax(GLuint op);
+extern GLuint _mesa_x86_cpuid_ebx(GLuint op);
+extern GLuint _mesa_x86_cpuid_ecx(GLuint op);
+extern GLuint _mesa_x86_cpuid_edx(GLuint op);
static void message( const char *msg )
{
@@ -240,8 +246,84 @@
{
(void) message; /* silence warning */
#ifdef USE_X86_ASM
- _mesa_x86_cpu_features = _mesa_identify_x86_cpu_features();
+ _mesa_x86_cpu_features = 0;
+ if (!_mesa_x86_has_cpuid()) {
+ message("CPUID not detected");
+ }
+ else {
+ GLuint cpu_features;
+ GLuint cpu_ext_features;
+ GLuint cpu_ext_info;
+ char cpu_vendor[13];
+ GLuint result;
+
+ /* get vendor name */
+ _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
+ cpu_vendor[12] = '\0';
+
+ message("cpu vendor: ");
+ message(cpu_vendor);
+ message("\n");
+
+ /* get cpu features */
+ cpu_features = _mesa_x86_cpuid_edx(1);
+
+ if (cpu_features & X86_CPU_FPU)
+ _mesa_x86_cpu_features |= X86_FEATURE_FPU;
+ if (cpu_features & X86_CPU_CMOV)
+ _mesa_x86_cpu_features |= X86_FEATURE_CMOV;
+
+#ifdef USE_MMX_ASM
+ if (cpu_features & X86_CPU_MMX)
+ _mesa_x86_cpu_features |= X86_FEATURE_MMX;
+#endif
+
+#ifdef USE_SSE_ASM
+ if (cpu_features & X86_CPU_XMM)
+ _mesa_x86_cpu_features |= X86_FEATURE_XMM;
+ if (cpu_features & X86_CPU_XMM2)
+ _mesa_x86_cpu_features |= X86_FEATURE_XMM2;
+#endif
+
+ /* query extended cpu features */
+ if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
+ if (cpu_ext_info >= 0x80000001) {
+
+ cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
+
+ if (cpu_features & X86_CPU_MMX) {
+
+#ifdef USE_3DNOW_ASM
+ if (cpu_ext_features & X86_CPUEXT_3DNOW)
+ _mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
+ if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
+ _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
+#endif
+
+#ifdef USE_MMX_ASM
+ if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
+ _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
+#endif
+ }
+ }
+
+ /* query cpu name */
+ if (cpu_ext_info >= 0x80000002) {
+ GLuint ofs;
+ char cpu_name[49];
+ for (ofs = 0; ofs < 3; ofs++)
+ _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
+ cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
+
+ message("cpu name: ");
+ message(cpu_name);
+ message("\n");
+ }
+ }
+
+ }
+
if ( getenv( "MESA_NO_ASM" ) ) {
_mesa_x86_cpu_features = 0;
}
Index: common_x86_asm.S
===================================================================
RCS file: /cvsroot/mesa3d/Mesa/src/X86/common_x86_asm.S,v
retrieving revision 1.13
diff -u -r1.13 common_x86_asm.S
--- common_x86_asm.S 9 Nov 2002 17:43:58 -0000 1.13
+++ common_x86_asm.S 31 Dec 2002 03:02:45 -0000
@@ -43,36 +43,11 @@
#include "matypes.h"
#include "common_x86_features.h"
-
-/* Intel vendor string
- */
-#define GENU 0x756e6547 /* "Genu" */
-#define INEI 0x49656e69 /* "ineI" */
-#define NTEL 0x6c65746e /* "ntel" */
-
-/* AMD vendor string
- */
-#define AUTH 0x68747541 /* "Auth" */
-#define ENTI 0x69746e65 /* "enti" */
-#define CAMD 0x444d4163 /* "cAMD" */
-
-
- SEG_DATA
-
-/* We might want to print out some useful messages.
- */
-GLNAME( found_intel ): STRING( "Genuine Intel processor found\n\0" )
-GLNAME( found_amd ): STRING( "Authentic AMD processor found\n\0" )
-
-
SEG_TEXT
ALIGNTEXT4
-GLOBL GLNAME( _mesa_identify_x86_cpu_features )
-GLNAME( _mesa_identify_x86_cpu_features ):
-
- PUSH_L ( EBX )
- PUSH_L ( ESI )
+GLOBL GLNAME( _mesa_x86_has_cpuid )
+GLNAME( _mesa_x86_has_cpuid ):
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
* (bit 21) is writable, the CPUID command is present.
@@ -89,94 +64,113 @@
/* Verify the ID Flag bit has been written.
*/
CMP_L ( ECX, EAX )
- JZ ( LLBL (cpuid_done) )
+ SETNE ( AL )
+ XOR_L ( CONST(0xff), EAX )
- /* Get the CPU vendor info.
- */
- XOR_L ( EAX, EAX )
- CPUID
+ RET
- /* Test for Intel processors. We must look for the
- * "GenuineIntel" string in EBX, ECX and EDX.
- */
- CMP_L ( CONST(GENU), EBX )
- JNE ( LLBL(cpuid_amd) )
- CMP_L ( CONST(INEI), EDX )
- JNE ( LLBL(cpuid_amd) )
- CMP_L ( CONST(NTEL), ECX )
- JNE ( LLBL(cpuid_amd) )
- /* We have an Intel processor, so we can get the feature
- * information with an CPUID input value of 1.
- */
- MOV_L ( CONST(0x1), EAX )
- CPUID
- MOV_L ( EDX, EAX )
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_cpuid )
+GLNAME( _mesa_x86_cpuid ):
- /* Mask out highest bit, which is used by AMD for 3dnow
- * Newer Intel have this bit set, but do not support 3dnow
- */
- AND_L ( CONST(0X7FFFFFFF), EAX)
- JMP ( LLBL(cpuid_done) )
+ MOV_L ( REGOFF(4, ESP), EAX ) /* cpuid op */
-LLBL(cpuid_amd):
+ PUSH_L ( EDI )
+ PUSH_L ( EBX )
+ PUSH_L ( ECX )
+ PUSH_L ( EDX )
- /* Test for AMD processors. We must look for the
- * "AuthenticAMD" string in EBX, ECX and EDX.
- */
- CMP_L ( CONST(AUTH), EBX )
- JNE ( LLBL(cpuid_other) )
- CMP_L ( CONST(ENTI), EDX )
- JNE ( LLBL(cpuid_other) )
- CMP_L ( CONST(CAMD), ECX )
- JNE ( LLBL(cpuid_other) )
-
- /* We have an AMD processor, so we can get the feature
- * information after we verify that the extended functions are
- * supported.
- */
- /* The features we need are almost all in the extended set. The
- * exception is SSE enable, which is in the standard set (0x1).
- */
- MOV_L ( CONST(0x1), EAX )
CPUID
- TEST_L ( EAX, EAX )
- JZ ( LLBL (cpuid_failed) )
- MOV_L ( EDX, ESI )
- MOV_L ( CONST(0x80000000), EAX )
+ MOV_L ( REGOFF(24, ESP), EDI ) /* *eax */
+ MOV_L ( EAX, REGIND(EDI) )
+ MOV_L ( REGOFF(28, ESP), EDI ) /* *ebx */
+ MOV_L ( EBX, REGIND(EDI) )
+ MOV_L ( REGOFF(32, ESP), EDI ) /* *ecx */
+ MOV_L ( ECX, REGIND(EDI) )
+ MOV_L ( REGOFF(36, ESP), EDI ) /* *edx */
+ MOV_L ( EDX, REGIND(EDI) )
+
+ POP_L ( EDX )
+ POP_L ( ECX )
+ POP_L ( EBX )
+ POP_L ( EDI )
+ RET
+
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_cpuid_eax )
+GLNAME( _mesa_x86_cpuid_eax ):
+
+ MOV_L ( REGOFF(4, ESP), EAX ) /* cpuid op */
+
+ PUSH_L ( EBX )
+ PUSH_L ( ECX )
+ PUSH_L ( EDX )
+
CPUID
- TEST_L ( EAX, EAX )
- JZ ( LLBL (cpuid_failed) )
- MOV_L ( CONST(0x80000001), EAX )
+ POP_L ( EDX )
+ POP_L ( ECX )
+ POP_L ( EBX )
+ RET
+
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_cpuid_ebx )
+GLNAME( _mesa_x86_cpuid_ebx ):
+
+ MOV_L ( REGOFF(4, ESP), EAX ) /* cpuid op */
+
+ PUSH_L ( EBX )
+ PUSH_L ( ECX )
+ PUSH_L ( EDX )
+
CPUID
- MOV_L ( EDX, EAX )
-
- AND_L ( CONST(0x02000000), ESI ) /* OR in the SSE bit */
- OR_L ( ESI, EAX )
-
- JMP ( LLBL (cpuid_done) )
+ MOV_L ( EBX, EAX ) /* return EBX */
-LLBL(cpuid_other):
+ POP_L ( EDX )
+ POP_L ( ECX )
+ POP_L ( EBX )
- /* Test for other processors here when required.
- */
+ RET
-LLBL(cpuid_failed):
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_cpuid_ecx )
+GLNAME( _mesa_x86_cpuid_ecx ):
- /* If we can't determine the feature information, we must
- * return zero to indicate that no platform-specific
- * optimizations can be used.
- */
- MOV_L ( CONST(0), EAX )
+ MOV_L ( REGOFF(4, ESP), EAX ) /* cpuid op */
+
+ PUSH_L ( EBX )
+ PUSH_L ( ECX )
+ PUSH_L ( EDX )
-LLBL (cpuid_done):
+ CPUID
+ MOV_L ( ECX, EAX ) /* return ECX */
- POP_L ( ESI )
+ POP_L ( EDX )
+ POP_L ( ECX )
POP_L ( EBX )
+
RET
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_cpuid_edx )
+GLNAME( _mesa_x86_cpuid_edx ):
+
+ MOV_L ( REGOFF(4, ESP), EAX ) /* cpuid op */
+
+ PUSH_L ( EBX )
+ PUSH_L ( ECX )
+ PUSH_L ( EDX )
+
+ CPUID
+ MOV_L ( EDX, EAX ) /* return EDX */
+
+ POP_L ( EDX )
+ POP_L ( ECX )
+ POP_L ( EBX )
+
+ RET
#ifdef USE_SSE_ASM
/* Execute an SSE instruction to see if the operating system correctly
Index: common_x86_features.h
===================================================================
RCS file: /cvsroot/mesa3d/Mesa/src/X86/common_x86_features.h,v
retrieving revision 1.5
diff -u -r1.5 common_x86_features.h
--- common_x86_features.h 29 Oct 2002 20:28:57 -0000 1.5
+++ common_x86_features.h 31 Dec 2002 03:02:46 -0000
@@ -37,38 +37,25 @@
/* Capabilities of CPUs
*/
-#define X86_FEATURE_FPU 0x00000001
-#define X86_FEATURE_VME 0x00000002
-#define X86_FEATURE_DE 0x00000004
-#define X86_FEATURE_PSE 0x00000008
-#define X86_FEATURE_TSC 0x00000010
-#define X86_FEATURE_MSR 0x00000020
-#define X86_FEATURE_PAE 0x00000040
-#define X86_FEATURE_MCE 0x00000080
-#define X86_FEATURE_CX8 0x00000100
-#define X86_FEATURE_APIC 0x00000200
-#define X86_FEATURE_10 0x00000400
-#define X86_FEATURE_SEP 0x00000800
-#define X86_FEATURE_MTRR 0x00001000
-#define X86_FEATURE_PGE 0x00002000
-#define X86_FEATURE_MCA 0x00004000
-#define X86_FEATURE_CMOV 0x00008000
-#define X86_FEATURE_PAT 0x00010000
-#define X86_FEATURE_PSE36 0x00020000
-#define X86_FEATURE_18 0x00040000
-#define X86_FEATURE_19 0x00080000
-#define X86_FEATURE_20 0x00100000
-#define X86_FEATURE_21 0x00200000
-#define X86_FEATURE_MMXEXT 0x00400000
-#define X86_FEATURE_MMX 0x00800000
-#define X86_FEATURE_FXSR 0x01000000
-#define X86_FEATURE_XMM 0x02000000
-#define X86_FEATURE_XMM2 0x04000000
-#define X86_FEATURE_27 0x08000000
-#define X86_FEATURE_28 0x10000000
-#define X86_FEATURE_29 0x20000000
-#define X86_FEATURE_3DNOWEXT 0x40000000
-#define X86_FEATURE_3DNOW 0x80000000
+#define X86_FEATURE_FPU (1<<0)
+#define X86_FEATURE_CMOV (1<<1)
+#define X86_FEATURE_MMXEXT (1<<2)
+#define X86_FEATURE_MMX (1<<3)
+#define X86_FEATURE_FXSR (1<<4)
+#define X86_FEATURE_XMM (1<<5)
+#define X86_FEATURE_XMM2 (1<<6)
+#define X86_FEATURE_3DNOWEXT (1<<7)
+#define X86_FEATURE_3DNOW (1<<8)
+
+#define X86_CPU_FPU (1<<0)
+#define X86_CPU_CMOV (1<<15)
+#define X86_CPU_MMX (1<<23)
+#define X86_CPU_XMM (1<<25)
+#define X86_CPU_XMM2 (1<<26)
+
+#define X86_CPUEXT_MMX_EXT (1<<22)
+#define X86_CPUEXT_3DNOW_EXT (1<<30)
+#define X86_CPUEXT_3DNOW (1<<31)
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX)
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
-------------------------------------------------------
This SF.NET email is sponsored by:
SourceForge Enterprise Edition + IBM + LinuxWorld = Something 2 See!
http://www.vasoftware.com
_______________________________________________
Dri-devel mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/dri-devel