Søren Sandmann Pedersen <sandm...@cs.au.dk> writes: > The changes to x86 are the most involved. There is now a > pixman_cpuid() function that uses inline assembly on GCC and the > cpuid__ intrinsic on MSVC. The assembly is written such that it will > work on both 32 and 64 bit; the main change required was the save %ebx > in %esi instead of on the stack.
I have pushed all the other changes, but saving %ebx in %esi is broken because on x86-64 writing to a 32 bit register zeroes the upper 32 bits of the corresponding 64 bit register. I can't think of a way to save the value of a 32 bit register that both works on x86-32 and doesn't lose the upper 32 bits on x86-64, so in the new version below, there are some #ifdefs to deal with this issue. Søren From 0037cbb84440e5cc6e64ea1c7b95ad7e80c21dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <s...@redhat.com> Date: Thu, 28 Jun 2012 15:53:14 -0400 Subject: [PATCH] Cleanups and simplifications in x86 CPU feature detection A new function pixman_cpuid() is added that runs the cpuid instruction and returns the results. On GCC this function uses inline assembly that is written such that it will work on both 32 and 64 bit. Compared to the old code, the only difference is %ebx is saved in %esi instead of on the stack. Saving 32 bit registers on a 64 bit stack is difficult or impossible because in 64 bit mode, the push and pop instructions work on 64 bit registers. On MSVC, the function calls the __cpuid intrinsic. There is also a new function called have_cpuid() which detects whether cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on x86-32 bit, it checks whether the 22nd bit of eflags can be modified. On MSVC this does have the consequence that pixman will no longer work CPUS without cpuid (ie., older than 486 and some 486 models). These two functions together makes it possible to write a generic detect_cpu_features() in plain C. This function is then used in a new have_feature() function that checks whether a specific set of feature bits is available. Aside from the cleanups and simplifications, the main benefit from this patch is that pixman now can do feature detection on x86-64, so that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And apparently the assumption that x86-64 CPUs always have MMX and SSE2 is no longer correct: Knight's Corner is x86-64, but doesn't have them). V2: Rename the constants in the getisax() code, as pointed out by Alan Coopersmith. Also reinstate the result variable and initialize features to 0. V3: Fixes for the fact that the upper 32 bits of a 64 bit register are zeroed whenever the corresponding 32 bit register is written to. --- pixman/pixman-x86.c | 347 +++++++++++++++++++++++---------------------------- 1 file changed, 157 insertions(+), 190 deletions(-) diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c index 52ad3df..3c7bc91 100644 --- a/pixman/pixman-x86.c +++ b/pixman/pixman-x86.c @@ -32,249 +32,216 @@ * that would lead to SIGILL instructions on old CPUs that don't have * it. */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include <sys/auxv.h> -#endif typedef enum { - NO_FEATURES = 0, - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 + X86_MMX = (1 << 0), + X86_MMX_EXTENSIONS = (1 << 1), + X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, + X86_SSE2 = (1 << 3), + X86_CMOV = (1 << 4) } cpu_features_t; +#ifdef HAVE_GETISAX -static unsigned int +#include <sys/auxv.h> + +static cpu_features_t detect_cpu_features (void) { - unsigned int features = 0; + cpu_features_t features = 0; unsigned int result = 0; - -#ifdef HAVE_GETISAX + if (getisax (&result, 1)) { if (result & AV_386_CMOV) - features |= CMOV; + features |= X86_CMOV; if (result & AV_386_MMX) - features |= MMX; + features |= X86_MMX; if (result & AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; + features |= X86_MMX_EXTENSIONS; if (result & AV_386_SSE) - features |= SSE; + features |= X86_SSE; if (result & AV_386_SSE2) - features |= SSE2; + features |= X86_SSE2; } + + return features; +} + +#else + +#define _PIXMAN_X86_64 \ + (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64)) + +static pixman_bool_t +have_cpuid (void) +{ +#if _PIXMAN_X86_64 || defined (_MSC_VER) + + return TRUE; + +#elif defined (__GNUC__) + uint32_t result; + + __asm__ volatile ( + "pushf" "\n\t" + "pop %%eax" "\n\t" + "mov %%eax, %%ecx" "\n\t" + "xor $0x00200000, %%eax" "\n\t" + "push %%eax" "\n\t" + "popf" "\n\t" + "pushf" "\n\t" + "pop %%eax" "\n\t" + "xor %%ecx, %%eax" "\n\t" + "mov %%eax, %0" "\n\t" + : "=r" (result) + : + : "%eax", "%ecx"); + + return !!result; + #else - char vendor[13]; -#ifdef _MSC_VER - int vendor0 = 0, vendor1, vendor2; +#error "Unknown compiler" #endif - vendor[0] = 0; - vendor[12] = 0; - -#ifdef __GNUC__ +} + +static void +pixman_cpuid (uint32_t feature, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered + /* On x86-32 we need to be careful about the handling of %ebx + * and %esp here. We can't declare either one as clobbered * since they are special registers (%ebx is the "PIC * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. + * stack pointer), so we need to make sure that %ebx is + * preserved, and that %esp has its original value when + * accessing the output operands. + * + * On x86-64, writing to a 32-bit register zeroes the + * upper upper 32 bits of the corresponding 64 bit + * register, so we can't just save %ebx in some other + * register and restore it. */ - __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - +#if defined (__GNUC__) + __asm__ volatile ( + "mov %4, %%eax" "\n\t" +#if !_PIXMAN_X86_64 + "mov %%ebx, %%esi" "\n\t" +#endif + "cpuid" "\n\t" + "mov %%eax, %0" "\n\t" + "mov %%ebx, %1" "\n\t" + "mov %%ecx, %2" "\n\t" + "mov %%edx, %3" "\n\t" +#if !_PIXMAN_X86_64 + "mov %%esi, %%ebx" "\n\t" +#endif + : "=m" (*a), "=m" (*b), "=m" (*c), "=m" (*d) + : "r" (feature) +#if !_PIXMAN_X86_64 + : "%eax", "%esi", "%ecx", "%edx" +#else + : "%rax", "%rbx", "%rcx", "%rdx" +#endif + ); #elif defined (_MSC_VER) - - _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx - nocpuid: - mov result, edx - } - memmove (vendor + 0, &vendor0, 4); - memmove (vendor + 4, &vendor1, 4); - memmove (vendor + 8, &vendor2, 4); - + int info[4]; + + __cpuid (info, feature); + + *a = info[0]; + *b = info[1]; + *c = info[2]; + *d = info[3]; #else -# error unsupported compiler +#error Unknown compiler #endif - - features = 0; - if (result) +} + +static cpu_features_t +detect_cpu_features (void) +{ + uint32_t a, b, c, d; + cpu_features_t features = 0; + + if (!have_cpuid()) + return features; + + /* Get feature bits */ + pixman_cpuid (0x01, &a, &b, &c, &d); + if (d & (1 << 15)) + features |= X86_CMOV; + if (d & (1 << 23)) + features |= X86_MMX; + if (d & (1 << 25)) + features |= X86_SSE; + if (d & (1 << 26)) + features |= X86_SSE2; + + /* Check for AMD specific features */ + if ((features & X86_MMX) && !(features & X86_SSE)) { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "Geode by NSC") == 0)) + char vendor[13]; + + /* Get vendor string */ + memset (vendor, 0, sizeof vendor); + + pixman_cpuid (0x00, &a, &b, &c, &d); + memcpy (vendor + 0, &b, 4); + memcpy (vendor + 4, &d, 4); + memcpy (vendor + 8, &c, 4); + + if (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0) { - /* check for AMD MMX extensions */ -#ifdef __GNUC__ - __asm__ ( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } -#endif - if (result & (1 << 22)) - features |= MMX_EXTENSIONS; + pixman_cpuid (0x80000000, &a, &b, &c, &d); + if (a >= 0x80000001) + { + pixman_cpuid (0x80000001, &a, &b, &c, &d); + + if (d & (1 << 22)) + features |= X86_MMX_EXTENSIONS; + } } } -#endif /* HAVE_GETISAX */ - + return features; } -#ifdef USE_X86_MMX -static pixman_bool_t -pixman_have_mmx (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t mmx_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); - initialized = TRUE; - } - - return mmx_present; -} #endif -#ifdef USE_SSE2 static pixman_bool_t -pixman_have_sse2 (void) +have_feature (cpu_features_t feature) { - static pixman_bool_t initialized = FALSE; - static pixman_bool_t sse2_present; - + static pixman_bool_t initialized; + static cpu_features_t features; + if (!initialized) { - unsigned int features = detect_cpu_features (); - sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); + features = detect_cpu_features(); initialized = TRUE; } - - return sse2_present; -} -#endif - -#else /* __amd64__ */ -#ifdef USE_X86_MMX -#define pixman_have_mmx() TRUE -#endif -#ifdef USE_SSE2 -#define pixman_have_sse2() TRUE -#endif -#endif /* __amd64__ */ + return (features & feature) == feature; +} #endif pixman_implementation_t * _pixman_x86_get_implementations (pixman_implementation_t *imp) { +#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) +#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) + #ifdef USE_X86_MMX - if (!_pixman_disabled ("mmx") && pixman_have_mmx()) + if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_SSE2 - if (!_pixman_disabled ("sse2") && pixman_have_sse2()) + if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS)) imp = _pixman_implementation_create_sse2 (imp); #endif -- 1.7.10.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman