FYI, we noticed the below changes on git://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git master commit 112b650f83e5ccea260708f8b7ca747580584659 ("mm: gup: make get_user_pages_fast and __get_user_pages_fast latency conscious")
========================================================================================= tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test: lkp-sbx04/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex1 commit: b7c3d6a0d545317e63ff58a1e60059ce79ac359e 112b650f83e5ccea260708f8b7ca747580584659 b7c3d6a0d545317e 112b650f83e5ccea260708f8b7 ---------------- -------------------------- %stddev %change %stddev \ | \ 5155618 ± 0% -6.8% 4806190 ± 0% will-it-scale.per_process_ops 1203105 ± 1% -3.0% 1166790 ± 0% will-it-scale.per_thread_ops 18828 ± 11% +97.6% 37211 ± 5% will-it-scale.time.involuntary_context_switches 80265 ± 8% -20.5% 63818 ± 11% numa-numastat.node0.numa_hit 18828 ± 11% +97.6% 37211 ± 5% time.involuntary_context_switches 3600 ± 3% +20.6% 4341 ± 5% vmstat.system.cs 227062 ± 3% +52.6% 346589 ± 1% latency_stats.hits.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.entry_SYSCALL_64_fastpath 5878405 ± 0% +19.7% 7035430 ± 0% latency_stats.sum.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.entry_SYSCALL_64_fastpath 7597 ± 12% -36.0% 4860 ± 32% numa-meminfo.node0.AnonPages 5012 ± 50% +59.1% 7975 ± 31% numa-meminfo.node2.Mapped 12346 ± 14% +24.2% 15338 ± 3% numa-meminfo.node3.SReclaimable 3516 ± 3% +10.0% 3867 ± 3% slabinfo.kmalloc-2048.active_objs 20418 ± 3% -10.9% 18198 ± 5% slabinfo.kmalloc-256.active_objs 344.25 ± 6% -25.9% 255.00 ± 14% slabinfo.kmem_cache.active_objs 344.25 ± 6% -25.9% 255.00 ± 14% slabinfo.kmem_cache.num_objs 683.00 ± 4% -16.4% 571.00 ± 7% slabinfo.kmem_cache_node.active_objs 752.00 ± 3% -14.9% 640.00 ± 7% slabinfo.kmem_cache_node.num_objs 1899 ± 12% -36.0% 1214 ± 32% numa-vmstat.node0.nr_anon_pages 113103 ± 8% -11.7% 99882 ± 4% numa-vmstat.node0.numa_hit 78508 ± 13% -16.9% 65214 ± 8% numa-vmstat.node0.numa_local 95.75 ± 24% -56.7% 41.50 ± 41% numa-vmstat.node2.nr_dirtied 1252 ± 50% +59.1% 1993 ± 31% numa-vmstat.node2.nr_mapped 93.25 ± 24% -57.1% 40.00 ± 44% numa-vmstat.node2.nr_written 3086 ± 14% +24.2% 3834 ± 3% numa-vmstat.node3.nr_slab_reclaimable 2.26 ± 2% -36.9% 1.42 ± 2% perf-profile.cpu-cycles.___might_sleep.__might_sleep.get_futex_key.futex_wake.do_futex 0.41 ± 5% +140.2% 0.98 ± 1% perf-profile.cpu-cycles.___might_sleep.get_futex_key.futex_wake.do_futex.sys_futex 0.00 ± -1% +Inf% 3.65 ± 1% perf-profile.cpu-cycles.___might_sleep.get_user_pages_fast.get_futex_key.futex_wake.do_futex 3.21 ± 2% -31.6% 2.20 ± 1% perf-profile.cpu-cycles.__might_sleep.get_futex_key.futex_wake.do_futex.sys_futex 4.42 ± 1% -11.8% 3.90 ± 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.10.futex_wake.do_futex.sys_futex.entry_SYSCALL_64_fastpath 26.58 ± 0% +18.8% 31.59 ± 0% perf-profile.cpu-cycles.get_user_pages_fast.get_futex_key.futex_wake.do_futex.sys_futex 14.18 ± 1% -9.6% 12.81 ± 0% perf-profile.cpu-cycles.unlock_page.get_futex_key.futex_wake.do_futex.sys_futex 193.00 ± 55% -60.1% 77.00 ± 87% sched_debug.cfs_rq[10]:/.blocked_load_avg 209.25 ± 51% -73.2% 56.00 ±107% sched_debug.cfs_rq[10]:/.tg_load_contrib 0.50 ±100% +350.0% 2.25 ± 72% sched_debug.cfs_rq[14]:/.nr_spread_over 65.00 ±154% -96.2% 2.50 ±100% sched_debug.cfs_rq[15]:/.blocked_load_avg 81.00 ±124% -79.9% 16.25 ± 13% sched_debug.cfs_rq[15]:/.tg_load_contrib 217.25 ± 96% -61.4% 83.75 ±167% sched_debug.cfs_rq[17]:/.blocked_load_avg 219.75 ± 95% -60.8% 86.25 ±161% sched_debug.cfs_rq[17]:/.tg_load_contrib 9485 ± 11% +16.1% 11016 ± 6% sched_debug.cfs_rq[19]:/.tg_load_avg 9453 ± 12% +16.5% 11015 ± 6% sched_debug.cfs_rq[20]:/.tg_load_avg 8076 ± 0% +13.7% 9178 ± 11% sched_debug.cfs_rq[21]:/.avg->runnable_avg_sum 9442 ± 11% +16.6% 11010 ± 6% sched_debug.cfs_rq[21]:/.tg_load_avg 175.00 ± 0% +13.4% 198.50 ± 11% sched_debug.cfs_rq[21]:/.tg_runnable_contrib 9432 ± 12% +16.7% 11010 ± 6% sched_debug.cfs_rq[22]:/.tg_load_avg 9473 ± 12% +15.9% 10977 ± 6% sched_debug.cfs_rq[23]:/.tg_load_avg 175.00 ± 2% +7.7% 188.50 ± 5% sched_debug.cfs_rq[27]:/.tg_runnable_contrib 35551 ± 12% -10.3% 31893 ± 1% sched_debug.cfs_rq[50]:/.exec_clock 21745 ± 6% +8.6% 23612 ± 6% sched_debug.cfs_rq[56]:/.exec_clock 73.75 ±100% +526.4% 462.00 ± 54% sched_debug.cfs_rq[63]:/.blocked_load_avg 84.25 ± 88% +460.8% 472.50 ± 52% sched_debug.cfs_rq[63]:/.tg_load_contrib 0.50 ±100% +550.0% 3.25 ± 25% sched_debug.cfs_rq[9]:/.nr_spread_over 10053 ± 27% +101.2% 20226 ± 14% sched_debug.cpu#0.nr_switches 19037 ± 14% +54.0% 29326 ± 10% sched_debug.cpu#0.sched_count 2136 ± 12% +236.9% 7198 ± 32% sched_debug.cpu#0.sched_goidle 15374 ± 36% -57.4% 6548 ± 67% sched_debug.cpu#1.nr_switches 15861 ± 35% -54.4% 7236 ± 60% sched_debug.cpu#1.sched_count 6837 ± 44% -55.2% 3060 ± 71% sched_debug.cpu#1.sched_goidle 2.00 ± 0% +50.0% 3.00 ± 0% sched_debug.cpu#17.cpu_load[2] 3269 ± 52% +182.5% 9233 ± 65% sched_debug.cpu#19.ttwu_count 1684 ± 13% -29.3% 1191 ± 15% sched_debug.cpu#23.ttwu_local 1275 ± 20% +203.1% 3865 ± 58% sched_debug.cpu#24.ttwu_local -6.25 ±-54% -100.0% 0.00 ± 0% sched_debug.cpu#25.nr_uninterruptible 1.50 ±233% -650.0% -8.25 ±-97% sched_debug.cpu#30.nr_uninterruptible -1.00 ±-187% -325.0% 2.25 ± 79% sched_debug.cpu#32.nr_uninterruptible 3.50 ± 47% -114.3% -0.50 ±-331% sched_debug.cpu#34.nr_uninterruptible 3.25 ± 39% -76.9% 0.75 ±145% sched_debug.cpu#37.nr_uninterruptible 236.50 ± 25% +597.6% 1649 ±131% sched_debug.cpu#39.sched_goidle 1.50 ±137% +316.7% 6.25 ± 45% sched_debug.cpu#41.nr_uninterruptible 984.50 ± 80% -63.9% 355.75 ± 40% sched_debug.cpu#42.ttwu_local 241.00 ± 7% +102.3% 487.50 ± 25% sched_debug.cpu#44.ttwu_local 522.50 ± 6% +385.2% 2535 ± 91% sched_debug.cpu#45.sched_goidle 1481 ± 17% +972.4% 15890 ±117% sched_debug.cpu#47.nr_switches 1566 ± 16% +925.2% 16056 ±117% sched_debug.cpu#47.sched_count 618.00 ± 19% +1162.9% 7804 ±119% sched_debug.cpu#47.sched_goidle 4222 ± 70% -61.6% 1619 ± 9% sched_debug.cpu#5.nr_switches 4705 ± 63% -52.6% 2229 ± 14% sched_debug.cpu#5.sched_count 1971 ± 76% -65.4% 682.00 ± 9% sched_debug.cpu#5.sched_goidle 1835 ± 36% +215.7% 5792 ± 36% sched_debug.cpu#53.nr_switches 1961 ± 37% +197.9% 5843 ± 36% sched_debug.cpu#53.sched_count 396.00 ± 19% +402.3% 1989 ± 71% sched_debug.cpu#53.ttwu_local 1803 ± 13% -32.6% 1215 ± 10% sched_debug.cpu#6.nr_switches 766.25 ± 13% -34.5% 502.25 ± 10% sched_debug.cpu#6.sched_goidle 395.50 ± 10% -52.3% 188.50 ± 15% sched_debug.cpu#6.ttwu_local 1105 ± 33% +788.4% 9823 ±107% sched_debug.cpu#60.sched_goidle lkp-sbx04: Sandy Bridge-EX Memory: 64G will-it-scale.per_process_ops 5.2e+06 ++------------------*--------------------------------------------+ *.. .*..*..*. .*..*..*...* | 5.15e+06 ++ *...*. *..*..*...*. | 5.1e+06 ++ | | | 5.05e+06 ++ | 5e+06 ++ | | | 4.95e+06 ++ | 4.9e+06 ++ | | O O | 4.85e+06 ++ | 4.8e+06 ++ O O O O O O O O | O O O O O O O O O O 4.75e+06 ++-O-------------------------------------------------------------+ [*] bisect-good sample [O] bisect-bad sample To reproduce: git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git cd lkp-tests bin/lkp install job.yaml # job file is attached in this email bin/lkp run job.yaml Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. Thanks, Ying Huang
--- LKP_SERVER: inn LKP_CGI_PORT: 80 LKP_CIFS_PORT: 139 testcase: will-it-scale default-monitors: wait: activate-monitor kmsg: uptime: iostat: vmstat: numa-numastat: numa-vmstat: numa-meminfo: proc-vmstat: proc-stat: interval: 10 meminfo: slabinfo: interrupts: lock_stat: latency_stats: softirqs: bdi_dev_mapping: diskstats: nfsstat: cpuidle: cpufreq-stats: turbostat: pmeter: sched_debug: interval: 60 cpufreq_governor: performance default-watchdogs: oom-killer: watchdog: commit: 752f49cf691de1a914be41c78111a0877af986ba model: Sandy Bridge-EX nr_cpu: 64 memory: 64G nr_ssd_partitions: 4 ssd_partitions: "/dev/disk/by-id/ata-INTEL_SSDSC2CW240A3_CVCV20430*-part1" swap_partitions: category: benchmark perf-profile: freq: 800 will-it-scale: test: futex1 queue: cyclic testbox: lkp-sbx04 tbox_group: lkp-sbx04 kconfig: x86_64-rhel enqueue_time: 2015-07-27 11:24:32.572062861 +08:00 user: lkp compiler: gcc-4.9 head_commit: 752f49cf691de1a914be41c78111a0877af986ba base_commit: cbfe8fa6cd672011c755c3cd85c9ffd4e2d10a6f branch: linux-devel/devel-hourly-2015072717 kernel: "/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/vmlinuz-4.2.0-rc4-wl-ath-02023-g752f49c" rootfs: debian-x86_64-2015-02-07.cgz result_root: "/result/will-it-scale/performance-futex1/lkp-sbx04/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/0" job_file: "/lkp/scheduled/lkp-sbx04/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-752f49cf691de1a914be41c78111a0877af986ba-20150727-107044-1mg92jq-0.yaml" dequeue_time: 2015-07-27 18:08:44.809469793 +08:00 max_uptime: 1500 initrd: "/osimage/debian/debian-x86_64-2015-02-07.cgz" bootloader_append: - root=/dev/ram0 - user=lkp - job=/lkp/scheduled/lkp-sbx04/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-752f49cf691de1a914be41c78111a0877af986ba-20150727-107044-1mg92jq-0.yaml - ARCH=x86_64 - kconfig=x86_64-rhel - branch=linux-devel/devel-hourly-2015072717 - commit=752f49cf691de1a914be41c78111a0877af986ba - BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/vmlinuz-4.2.0-rc4-wl-ath-02023-g752f49c - max_uptime=1500 - RESULT_ROOT=/result/will-it-scale/performance-futex1/lkp-sbx04/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/0 - LKP_SERVER=inn - |2- earlyprintk=ttyS0,115200 systemd.log_level=err debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0 console=ttyS0,115200 console=tty0 vga=normal rw lkp_initrd: "/lkp/lkp/lkp-x86_64.cgz" modules_initrd: "/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/modules.cgz" bm_initrd: "/osimage/deps/debian-x86_64-2015-02-07.cgz/lkp.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/run-ipconfig.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/turbostat.cgz,/lkp/benchmarks/turbostat.cgz,/lkp/benchmarks/will-it-scale.cgz" job_state: finished loadavg: 46.70 20.74 8.06 1/624 11372 start_time: '1437991880' end_time: '1437992189' version: "/lkp/lkp/.src-20150727-150448"
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu10/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu11/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu13/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu14/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu15/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu16/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu17/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu18/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu19/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu20/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu21/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu22/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu23/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu24/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu25/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu26/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu27/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu28/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu29/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu30/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu31/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu32/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu33/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu34/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu35/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu36/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu37/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu38/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu39/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu40/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu41/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu42/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu43/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu44/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu45/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu46/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu47/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu48/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu49/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu50/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu51/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu52/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu53/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu54/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu55/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu56/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu57/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu58/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu59/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu60/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu61/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu62/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu63/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu8/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu9/cpufreq/scaling_governor ./runtest.py futex1 16 both 1 8 16 24 32 48 64