Here is analysis by Paolo Bonzini: I compared crypto/x86_64cpuid.pl and crypto/x86cpuid.pl, and the code in the latter is wrong.
>From x86_64cpuid.pl: mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx bt \$27,%r9d # check OSXSAVE bit jnc .Lclear_avx xor %ecx,%ecx # XCR0 .byte 0x0f,0x01,0xd0 # xgetbv and \$6,%eax # isolate XMM and YMM state support cmp \$6,%eax je .Ldone .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) and %eax,%r9d # clear AVX, FMA and AMD XOP bits .Ldone: >From x86cpuid.pl: &bt ("ecx",26); # check XSAVE bit &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit &jnc (&label("clear_xmm")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv &and ("eax",6); &cmp ("eax",6); &je (&label("done")); &cmp ("eax",2); &je (&label("clear_avx")); &set_label("clear_xmm"); &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits &set_label("done"); x86_64cpuid.pl is not completely correct; if bit 1 of EAX was zero (XMM support not enabled in the OS) you would need to clear AESNI and PCLMULQDQ bits as done in x86cpuid.pl. However, in practice does not matter because any OS new enough to set OSXSAVE will always enable XMM support as well. x86cpuid.pl instead is completely broken: - the whole test is bypassed if XSAVE=1, which makes absolutely no sense. x86_64cpuid.pl is right in testing OSXSAVE - if OSXSAVE=0, all SSE code is disabled, which also makes no sense because any OS less than 10 years old lets you use SSE even if it does not set OSXSAVE (via FXSAVE), and this includes of course RHEL6. The attached patch (unfortunately not yet tested) synchronizes the two tests.
--- crypto/x86cpuid.pl 2011-10-26 17:13:03.599641479 +0200 +++ crypto/x86cpuid.pl 2011-10-26 17:41:04.400262001 +0200 @@ -119,20 +119,13 @@ &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag - &bt ("ecx",26); # check XSAVE bit - &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit - &jnc (&label("clear_xmm")); - &xor ("ecx","ecx"); + &jnc (&label("clear_avx")); + &xor ("ecx","ecx"); # XCR0 &data_byte(0x0f,0x01,0xd0); # xgetbv - &and ("eax",6); + &and ("eax",6); # isolate XMM and YMM state support &cmp ("eax",6); &je (&label("done")); - &cmp ("eax",2); - &je (&label("clear_avx")); -&set_label("clear_xmm"); - &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits - &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits &set_label("done");