commit:     90f12d61a417b874e654af1f28271d95ca0e1c6d
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Fri Feb 13 01:35:43 2015 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Fri Feb 13 01:35:43 2015 +0000
URL:        
http://sources.gentoo.org/gitweb/?p=proj/linux-patches.git;a=commit;h=90f12d61

Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.

---
 0000_README                                        |   5 +
 ...-additional-cpu-optimizations-for-gcc-4.9.patch | 387 +++++++++++++++++++++
 2 files changed, 392 insertions(+)

diff --git a/0000_README b/0000_README
index d897d2e..116cf1b 100644
--- a/0000_README
+++ b/0000_README
@@ -114,3 +114,8 @@ Desc:   BFQ v7r7 patch 2 for 3.17: BFQ Scheduler
 Patch:  5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r7-for-3.18.0.patch
 From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
 Desc:   BFQ v7r7 patch 3 for 3.17: Early Queue Merge (EQM)
+
+Patch:  5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
+From:   https://github.com/graysky2/kernel_gcc_patch/
+Desc:   Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.
+

diff --git a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch 
b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
new file mode 100644
index 0000000..f931f75
--- /dev/null
+++ b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
@@ -0,0 +1,387 @@
+WARNING - this version of the patch works with version 4.9+ of gcc and with
+kernel version 3.15.x+ and should NOT be applied when compiling on older
+versions due to name changes of the flags with the 4.9 release of gcc.
+Use the older version of this patch hosted on the same github for older
+versions of gcc. For example:
+
+corei7 --> nehalem
+corei7-avx --> sandybridge
+core-avx-i --> ivybridge
+core-avx2 --> haswell
+
+For more, see: https://gcc.gnu.org/gcc-4.9/changes.html
+
+It also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 changes.
+Note that upstream is using the deprecated 'match=atom' flags when I believe it
+should use the newer 'march=bonnell' flag for atom processors.
+
+I have made that change to this patch set as well.  See the following kernel
+bug report to see if I'm right: 
https://bugzilla.kernel.org/show_bug.cgi?id=77461
+
+This patch will expand the number of microarchitectures to include new
+processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
+14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
+Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 1.5 Gen Core
+i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7 (Sandybridge), Intel 3rd Gen
+Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core i3/i5/i7 (Haswell), and Intel 5th
+Gen Core i3/i5/i7 (Broadwell). It also offers the compiler the 'native' flag.
+
+Small but real speed increases are measurable using a make endpoint comparing
+a generic kernel to one built with one of the respective microarchs.
+
+See the following experimental evidence supporting this statement:
+https://github.com/graysky2/kernel_gcc_patch
+
+REQUIREMENTS
+linux version >=3.15
+gcc version >=4.9
+
+--- a/arch/x86/include/asm/module.h    2014-08-03 18:25:02.000000000 -0400
++++ b/arch/x86/include/asm/module.h    2014-09-13 09:37:16.721385247 -0400
+@@ -15,6 +15,20 @@
+ #define MODULE_PROC_FAMILY "586MMX "
+ #elif defined CONFIG_MCORE2
+ #define MODULE_PROC_FAMILY "CORE2 "
++#elif defined CONFIG_MNATIVE
++#define MODULE_PROC_FAMILY "NATIVE "
++#elif defined CONFIG_MNEHALEM
++#define MODULE_PROC_FAMILY "NEHALEM "
++#elif defined CONFIG_MWESTMERE
++#define MODULE_PROC_FAMILY "WESTMERE "
++#elif defined CONFIG_MSANDYBRIDGE
++#define MODULE_PROC_FAMILY "SANDYBRIDGE "
++#elif defined CONFIG_MIVYBRIDGE
++#define MODULE_PROC_FAMILY "IVYBRIDGE "
++#elif defined CONFIG_MHASWELL
++#define MODULE_PROC_FAMILY "HASWELL "
++#elif defined CONFIG_MBROADWELL
++#define MODULE_PROC_FAMILY "BROADWELL "
+ #elif defined CONFIG_MATOM
+ #define MODULE_PROC_FAMILY "ATOM "
+ #elif defined CONFIG_M686
+@@ -33,6 +47,20 @@
+ #define MODULE_PROC_FAMILY "K7 "
+ #elif defined CONFIG_MK8
+ #define MODULE_PROC_FAMILY "K8 "
++#elif defined CONFIG_MK8SSE3
++#define MODULE_PROC_FAMILY "K8SSE3 "
++#elif defined CONFIG_MK10
++#define MODULE_PROC_FAMILY "K10 "
++#elif defined CONFIG_MBARCELONA
++#define MODULE_PROC_FAMILY "BARCELONA "
++#elif defined CONFIG_MBOBCAT
++#define MODULE_PROC_FAMILY "BOBCAT "
++#elif defined CONFIG_MBULLDOZER
++#define MODULE_PROC_FAMILY "BULLDOZER "
++#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "PILEDRIVER "
++#elif defined CONFIG_MJAGUAR
++#define MODULE_PROC_FAMILY "JAGUAR "
+ #elif defined CONFIG_MELAN
+ #define MODULE_PROC_FAMILY "ELAN "
+ #elif defined CONFIG_MCRUSOE
+--- a/arch/x86/Kconfig.cpu     2014-08-03 18:25:02.000000000 -0400
++++ b/arch/x86/Kconfig.cpu     2014-09-13 09:37:16.721385247 -0400
+@@ -137,9 +137,8 @@ config MPENTIUM4
+               -Paxville
+               -Dempsey
+ 
+-
+ config MK6
+-      bool "K6/K6-II/K6-III"
++      bool "AMD K6/K6-II/K6-III"
+       depends on X86_32
+       ---help---
+         Select this for an AMD K6-family processor.  Enables use of
+@@ -147,7 +146,7 @@ config MK6
+         flags to GCC.
+ 
+ config MK7
+-      bool "Athlon/Duron/K7"
++      bool "AMD Athlon/Duron/K7"
+       depends on X86_32
+       ---help---
+         Select this for an AMD Athlon K7-family processor.  Enables use of
+@@ -155,12 +154,62 @@ config MK7
+         flags to GCC.
+ 
+ config MK8
+-      bool "Opteron/Athlon64/Hammer/K8"
++      bool "AMD Opteron/Athlon64/Hammer/K8"
+       ---help---
+         Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+         Enables use of some extended instructions, and passes appropriate
+         optimization flags to GCC.
+ 
++config MK8SSE3
++      bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
++      ---help---
++        Select this for improved AMD Opteron or Athlon64 Hammer-family 
processors.
++        Enables use of some extended instructions, and passes appropriate
++        optimization flags to GCC.
++
++config MK10
++      bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
++      ---help---
++        Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
++              Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family 
processor.
++        Enables use of some extended instructions, and passes appropriate
++        optimization flags to GCC.
++
++config MBARCELONA
++      bool "AMD Barcelona"
++      ---help---
++        Select this for AMD Barcelona and newer processors.
++
++        Enables -march=barcelona
++
++config MBOBCAT
++      bool "AMD Bobcat"
++      ---help---
++        Select this for AMD Bobcat processors.
++
++        Enables -march=btver1
++
++config MBULLDOZER
++      bool "AMD Bulldozer"
++      ---help---
++        Select this for AMD Bulldozer processors.
++
++        Enables -march=bdver1
++
++config MPILEDRIVER
++      bool "AMD Piledriver"
++      ---help---
++        Select this for AMD Piledriver processors.
++
++        Enables -march=bdver2
++
++config MJAGUAR
++      bool "AMD Jaguar"
++      ---help---
++        Select this for AMD Jaguar processors.
++
++        Enables -march=btver2
++
+ config MCRUSOE
+       bool "Crusoe"
+       depends on X86_32
+@@ -251,8 +300,17 @@ config MPSC
+         using the cpu family field
+         in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+ 
++config MATOM
++      bool "Intel Atom"
++      ---help---
++
++        Select this for the Intel Atom platform. Intel Atom CPUs have an
++        in-order pipelining architecture and thus can benefit from
++        accordingly optimized code. Use a recent GCC with specific Atom
++        support in order to fully benefit from selecting this option.
++
+ config MCORE2
+-      bool "Core 2/newer Xeon"
++      bool "Intel Core 2"
+       ---help---
+ 
+         Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+@@ -260,14 +318,55 @@ config MCORE2
+         family in /proc/cpuinfo. Newer ones have 6 and older ones 15
+         (not a typo)
+ 
+-config MATOM
+-      bool "Intel Atom"
++        Enables -march=core2
++
++config MNEHALEM
++      bool "Intel Nehalem"
+       ---help---
+ 
+-        Select this for the Intel Atom platform. Intel Atom CPUs have an
+-        in-order pipelining architecture and thus can benefit from
+-        accordingly optimized code. Use a recent GCC with specific Atom
+-        support in order to fully benefit from selecting this option.
++        Select this for 1st Gen Core processors in the Nehalem family.
++
++        Enables -march=nehalem
++
++config MWESTMERE
++      bool "Intel Westmere"
++      ---help---
++
++        Select this for the Intel Westmere formerly Nehalem-C family.
++
++        Enables -march=westmere
++
++config MSANDYBRIDGE
++      bool "Intel Sandy Bridge"
++      ---help---
++
++        Select this for 2nd Gen Core processors in the Sandy Bridge family.
++
++        Enables -march=sandybridge
++
++config MIVYBRIDGE
++      bool "Intel Ivy Bridge"
++      ---help---
++
++        Select this for 3rd Gen Core processors in the Ivy Bridge family.
++
++        Enables -march=ivybridge
++
++config MHASWELL
++      bool "Intel Haswell"
++      ---help---
++
++        Select this for 4th Gen Core processors in the Haswell family.
++
++        Enables -march=haswell
++
++config MBROADWELL
++      bool "Intel Broadwell"
++      ---help---
++
++        Select this for 5th Gen Core processors in the Broadwell family.
++
++        Enables -march=broadwell
+ 
+ config GENERIC_CPU
+       bool "Generic-x86-64"
+@@ -276,6 +375,19 @@ config GENERIC_CPU
+         Generic x86-64 CPU.
+         Run equally well on all x86-64 CPUs.
+ 
++config MNATIVE
++ bool "Native optimizations autodetected by GCC"
++ ---help---
++
++   GCC 4.2 and above support -march=native, which automatically detects
++   the optimum settings to use based on your processor. -march=native 
++   also detects and applies additional settings beyond -march specific
++   to your CPU, (eg. -msse4). Unless you have a specific reason not to
++   (e.g. distcc cross-compiling), you should probably be using
++   -march=native rather than anything listed below.
++
++   Enables -march=native
++
+ endchoice
+ 
+ config X86_GENERIC
+@@ -300,7 +412,7 @@ config X86_INTERNODE_CACHE_SHIFT
+ config X86_L1_CACHE_SHIFT
+       int
+       default "7" if MPENTIUM4 || MPSC
+-      default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || 
X86_GENERIC || GENERIC_CPU
++      default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT 
|| MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || 
MWESTMERE || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || BROADWELL || MNATIVE || 
MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+       default "4" if MELAN || M486 || MGEODEGX1
+       default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || 
M586 || MVIAC3_2 || MGEODE_LX
+ 
+@@ -331,11 +443,11 @@ config X86_ALIGNMENT_16
+ 
+ config X86_INTEL_USERCOPY
+       def_bool y
+-      depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || 
M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
++      depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || 
M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 
|| MBARCELONA || MNEHALEM || MWESTMERE || MSANDYBRIDGE || MIVYBRIDGE || 
MHASWELL || MBROADWELL || MNATIVE
+ 
+ config X86_USE_PPRO_CHECKSUM
+       def_bool y
+-      depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || 
MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 
|| MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
++      depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 
|| MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || 
MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM 
|| MWESTMERE || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MATOM 
|| MNATIVE
+ 
+ config X86_USE_3DNOW
+       def_bool y
+@@ -359,17 +471,17 @@ config X86_P6_NOP
+ 
+ config X86_TSC
+       def_bool y
+-      depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || 
MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || 
MATOM) || X86_64
++      depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || 
MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || 
MCORE2 || MNEHALEM || MWESTMERE || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || 
MBROADWELL || MNATIVE || MATOM) || X86_64
+ 
+ config X86_CMPXCHG64
+       def_bool y
+-      depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || 
MPENTIUMIII || MPENTIUMII || M686 || MATOM
++      depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || 
MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM 
|| MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
+ 
+ # this should be set for all -march=.. options where the compiler
+ # generates cmov.
+ config X86_CMOV
+       def_bool y
+-      depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || 
MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON 
|| X86_64 || MATOM || MGEODE_LX)
++      depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || 
MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE 
|| MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || 
MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE 
|| MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
+ 
+ config X86_MINIMUM_CPU_FAMILY
+       int
+--- a/arch/x86/Makefile        2014-08-03 18:25:02.000000000 -0400
++++ b/arch/x86/Makefile        2014-09-13 09:37:16.721385247 -0400
+@@ -92,13 +92,33 @@ else
+       KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+ 
+         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
++        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
++        cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
++        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
++        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
++        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
++        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
++        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
++        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
+         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+ 
+         cflags-$(CONFIG_MCORE2) += \
+-                $(call cc-option,-march=core2,$(call 
cc-option,-mtune=generic))
+-      cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+-              $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
++                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
++        cflags-$(CONFIG_MNEHALEM) += \
++                $(call cc-option,-march=nehalem,$(call 
cc-option,-mtune=nehalem))
++        cflags-$(CONFIG_MWESTMERE) += \
++                $(call cc-option,-march=westmere,$(call 
cc-option,-mtune=westmere))
++        cflags-$(CONFIG_MSANDYBRIDGE) += \
++                $(call cc-option,-march=sandybridge,$(call 
cc-option,-mtune=sandybridge))
++        cflags-$(CONFIG_MIVYBRIDGE) += \
++                $(call cc-option,-march=ivybridge,$(call 
cc-option,-mtune=ivybridge))
++        cflags-$(CONFIG_MHASWELL) += \
++                $(call cc-option,-march=haswell,$(call 
cc-option,-mtune=haswell))
++        cflags-$(CONFIG_MBROADWELL) += \
++                $(call cc-option,-march=broadwell,$(call 
cc-option,-mtune=broadwell))
++        cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
++                $(call cc-option,-mtune=bonnell,$(call 
cc-option,-mtune=generic))
+         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+         KBUILD_CFLAGS += $(cflags-y)
+ 
+--- a/arch/x86/Makefile_32.cpu 2014-08-03 18:25:02.000000000 -0400
++++ b/arch/x86/Makefile_32.cpu 2014-09-13 09:37:16.721385247 -0400
+@@ -23,7 +23,15 @@ cflags-$(CONFIG_MK6)                += -march=k6
+ # Please note, that patches that add -march=athlon-xp and friends are 
pointless.
+ # They make zero difference whatsosever to performance at this time.
+ cflags-$(CONFIG_MK7)          += -march=athlon
++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+ cflags-$(CONFIG_MK8)          += $(call cc-option,-march=k8,-march=athlon)
++cflags-$(CONFIG_MK8SSE3)              += $(call 
cc-option,-march=k8-sse3,-march=athlon)
++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
++cflags-$(CONFIG_MBARCELONA)   += $(call 
cc-option,-march=barcelona,-march=athlon)
++cflags-$(CONFIG_MBOBCAT)      += $(call cc-option,-march=btver1,-march=athlon)
++cflags-$(CONFIG_MBULLDOZER)   += $(call cc-option,-march=bdver1,-march=athlon)
++cflags-$(CONFIG_MPILEDRIVER)  += $(call cc-option,-march=bdver2,-march=athlon)
++cflags-$(CONFIG_MJAGUAR)      += $(call cc-option,-march=btver2,-march=athlon)
+ cflags-$(CONFIG_MCRUSOE)      += -march=i686 $(align)-functions=0 
$(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MEFFICEON)    += -march=i686 $(call tune,pentium3) 
$(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MWINCHIPC6)   += $(call 
cc-option,-march=winchip-c6,-march=i586)
+@@ -32,8 +40,14 @@ cflags-$(CONFIG_MCYRIXIII)  += $(call cc-
+ cflags-$(CONFIG_MVIAC3_2)     += $(call cc-option,-march=c3-2,-march=i686)
+ cflags-$(CONFIG_MVIAC7)               += -march=i686
+ cflags-$(CONFIG_MCORE2)               += -march=i686 $(call tune,core2)
+-cflags-$(CONFIG_MATOM)                += $(call cc-option,-march=atom,$(call 
cc-option,-march=core2,-march=i686)) \
+-      $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
++cflags-$(CONFIG_MNEHALEM)     += -march=i686 $(call tune,nehalem)
++cflags-$(CONFIG_MWESTMERE)    += -march=i686 $(call tune,westmere)
++cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge)
++cflags-$(CONFIG_MIVYBRIDGE)   += -march=i686 $(call tune,ivybridge)
++cflags-$(CONFIG_MHASWELL)     += -march=i686 $(call tune,haswell)
++cflags-$(CONFIG_MBROADWELL)   += -march=i686 $(call tune,broadwell)
++cflags-$(CONFIG_MATOM)                += $(call 
cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
++      $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
+ 
+ # AMD Elan support
+ cflags-$(CONFIG_MELAN)                += -march=i486

Reply via email to