From: Alex Smith <[email protected]>

Prevents the selection of the slowest code path on non-Intel CPUs.
Taken from x264 and adapted for use with Libav.

Signed-off-by: Derek Buitenhuis <[email protected]>
---
Changes:
- Both authors have agreed to relicense to LGPL.
- Line length fixes.
- Commented #endif.
---
 libavutil/x86/cpu.c     | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/x86/cpu.h     |  1 +
 libavutil/x86/cpuid.asm | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 3b36fd0..e702a6b 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -199,3 +199,50 @@ int ff_get_cpu_flags_x86(void)
 
     return rval;
 }
+
+#if HAVE_MMX
+#ifdef __INTEL_COMPILER
+/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
+ * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
+ * adapted to Libav's cpu schema. Copied from x264 and adapted for Libav. */
+
+/* Global variable indicating CPU */
+int __intel_cpu_indicator = 0;
+
+/* CPU dispatcher function */
+void ff_intel_cpu_indicator_init(void)
+{
+    unsigned int cpu = ff_get_cpu_flags_x86();
+    if (cpu & AV_CPU_FLAG_AVX)
+        __intel_cpu_indicator = 0x20000;
+    else if (cpu & AV_CPU_FLAG_SSE42)
+        __intel_cpu_indicator = 0x8000;
+    else if (cpu & AV_CPU_FLAG_SSE4)
+        __intel_cpu_indicator = 0x2000;
+    else if (cpu & AV_CPU_FLAG_SSSE3)
+        __intel_cpu_indicator = 0x1000;
+    else if (cpu & AV_CPU_FLAG_SSE3)
+        __intel_cpu_indicator = 0x800;
+    else if (cpu & AV_CPU_FLAG_SSE2 && !(cpu & AV_CPU_FLAG_SSE2SLOW))
+        __intel_cpu_indicator = 0x200;
+    else if (cpu & AV_CPU_FLAG_SSE)
+        __intel_cpu_indicator = 0x80;
+    else if (cpu & AV_CPU_FLAG_MMXEXT)
+        __intel_cpu_indicator = 8;
+    else
+        __intel_cpu_indicator = 1;
+}
+
+/* __intel_cpu_indicator_init appears to have a non-standard calling convention
+ * that assumes certain registers aren't preserved, so we'll route it through a
+ * function that backs up all the registers. */
+void __intel_cpu_indicator_init(void)
+{
+    ff_safe_intel_cpu_indicator_init();
+}
+#else
+void ff_intel_cpu_indicator_init(void)
+{
+}
+#endif /* HAVE_MMX */
+#endif /* __INTEL_COMPILER */
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index e4f6f0b..62122f4 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -57,5 +57,6 @@
 void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
 void ff_cpu_xgetbv(int op, int *eax, int *edx);
 int  ff_cpu_cpuid_test(void);
+void ff_safe_intel_cpu_indicator_init(void);
 
 #endif /* AVUTIL_X86_CPU_H */
diff --git a/libavutil/x86/cpuid.asm b/libavutil/x86/cpuid.asm
index e739ebe..0f84a92 100644
--- a/libavutil/x86/cpuid.asm
+++ b/libavutil/x86/cpuid.asm
@@ -89,3 +89,53 @@ cglobal cpu_cpuid_test
     popfd
     ret
 %endif
+
+cextern intel_cpu_indicator_init
+
+;-----------------------------------------------------------------------------
+; void ff_safe_intel_cpu_indicator_init(void)
+;-----------------------------------------------------------------------------
+cglobal safe_intel_cpu_indicator_init
+    push r0
+    push r1
+    push r2
+    push r3
+    push r4
+    push r5
+    push r6
+%if ARCH_X86_64
+    push r7
+    push r8
+    push r9
+    push r10
+    push r11
+    push r12
+    push r13
+    push r14
+%endif
+    push rbp
+    mov  rbp, rsp
+%if WIN64
+    sub  rsp, 32 ; shadow space
+%endif
+    and  rsp, ~31
+    call intel_cpu_indicator_init
+    leave
+%if ARCH_X86_64
+    pop r14
+    pop r13
+    pop r12
+    pop r11
+    pop r10
+    pop r9
+    pop r8
+    pop r7
+%endif
+    pop r6
+    pop r5
+    pop r4
+    pop r3
+    pop r2
+    pop r1
+    pop r0
+    ret
-- 
1.8.3.1

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to