FYI, we noticed the below changes on https://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git master commit 81c72584a480c5a4b7eede527d0b990c83c2dcc9 ("mm: gup: make get_user_pages_fast and __get_user_pages_fast latency conscious")
========================================================================================= tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test: ivb42/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex1 commit: 4ae904c494e475048050994f669137c12274da85 81c72584a480c5a4b7eede527d0b990c83c2dcc9 4ae904c494e47504 81c72584a480c5a4b7eede527d ---------------- -------------------------- %stddev %change %stddev \ | \ 5375911 ± 0% -4.3% 5146855 ± 0% will-it-scale.per_process_ops 1605249 ± 1% -3.1% 1555950 ± 0% will-it-scale.per_thread_ops 0.60 ± 1% -4.2% 0.58 ± 0% will-it-scale.scalability 9957 ± 27% -28.6% 7114 ± 0% numa-meminfo.node0.Mapped 1933 ± 17% +16.0% 2243 ± 6% numa-meminfo.node1.PageTables 2488 ± 27% -28.6% 1777 ± 0% numa-vmstat.node0.nr_mapped 483.00 ± 17% +16.0% 560.50 ± 6% numa-vmstat.node1.nr_page_table_pages 42.00 ± 12% -31.5% 28.75 ± 11% sched_debug.cfs_rq[0]:/.load 2032736 ± 5% -12.5% 1779371 ± 7% sched_debug.cfs_rq[0]:/.min_vruntime -300090 ±-69% -103.1% 9378 ±1396% sched_debug.cfs_rq[10]:/.spread0 -235906 ±-47% -103.2% 7486 ±1760% sched_debug.cfs_rq[11]:/.spread0 -885383 ±-11% -29.4% -625333 ±-21% sched_debug.cfs_rq[13]:/.spread0 -883477 ±-12% -28.4% -632137 ±-19% sched_debug.cfs_rq[14]:/.spread0 -881069 ±-12% -28.6% -629181 ±-20% sched_debug.cfs_rq[15]:/.spread0 -888493 ±-12% -29.9% -622785 ±-19% sched_debug.cfs_rq[16]:/.spread0 -883314 ±-13% -28.9% -627753 ±-20% sched_debug.cfs_rq[17]:/.spread0 -1037778 ±-20% -39.9% -623972 ±-21% sched_debug.cfs_rq[18]:/.spread0 -882564 ±-12% -29.3% -623573 ±-20% sched_debug.cfs_rq[19]:/.spread0 -237868 ±-46% -106.0% 14369 ±854% sched_debug.cfs_rq[1]:/.spread0 -870685 ±-11% -29.7% -612118 ±-18% sched_debug.cfs_rq[20]:/.spread0 -879689 ±-12% -29.5% -620241 ±-20% sched_debug.cfs_rq[21]:/.spread0 -872185 ±-13% -27.7% -630771 ±-21% sched_debug.cfs_rq[22]:/.spread0 -882721 ±-12% -28.3% -633288 ±-21% sched_debug.cfs_rq[23]:/.spread0 13.25 ± 47% +98.1% 26.25 ± 29% sched_debug.cfs_rq[24]:/.tg_load_avg_contrib -198518 ±-57% -127.2% 53978 ±241% sched_debug.cfs_rq[25]:/.spread0 15.00 ± 33% -53.3% 7.00 ± 0% sched_debug.cfs_rq[26]:/.load_avg -166551 ±-60% -135.2% 58649 ±214% sched_debug.cfs_rq[26]:/.spread0 15.25 ± 34% -54.1% 7.00 ± 0% sched_debug.cfs_rq[26]:/.tg_load_avg_contrib -195491 ±-57% -128.4% 55586 ±227% sched_debug.cfs_rq[27]:/.spread0 -189456 ±-56% -130.0% 56778 ±222% sched_debug.cfs_rq[28]:/.spread0 -198122 ±-56% -131.1% 61555 ±202% sched_debug.cfs_rq[29]:/.spread0 -267573 ±-52% -105.6% 14934 ±816% sched_debug.cfs_rq[2]:/.spread0 -196299 ±-56% -129.7% 58206 ±217% sched_debug.cfs_rq[30]:/.spread0 -188828 ±-53% -130.7% 57930 ±219% sched_debug.cfs_rq[31]:/.spread0 -197148 ±-54% -131.1% 61392 ±204% sched_debug.cfs_rq[32]:/.spread0 -191912 ±-55% -130.1% 57741 ±218% sched_debug.cfs_rq[33]:/.spread0 -196722 ±-57% -129.5% 58104 ±215% sched_debug.cfs_rq[35]:/.spread0 -802782 ±-14% -31.0% -554283 ±-22% sched_debug.cfs_rq[37]:/.spread0 183.25 ± 7% -7.9% 168.75 ± 0% sched_debug.cfs_rq[37]:/.util_avg -798974 ±-14% -31.3% -548870 ±-24% sched_debug.cfs_rq[38]:/.spread0 -804061 ±-13% -31.9% -547569 ±-23% sched_debug.cfs_rq[39]:/.spread0 -241212 ±-46% -104.2% 10110 ±1225% sched_debug.cfs_rq[3]:/.spread0 -804833 ±-13% -32.5% -542990 ±-24% sched_debug.cfs_rq[40]:/.spread0 -802162 ±-13% -31.6% -548407 ±-23% sched_debug.cfs_rq[41]:/.spread0 -804352 ±-13% -33.8% -532778 ±-26% sched_debug.cfs_rq[43]:/.spread0 -803450 ±-13% -31.6% -549859 ±-22% sched_debug.cfs_rq[44]:/.spread0 -804660 ±-13% -32.2% -545711 ±-22% sched_debug.cfs_rq[45]:/.spread0 -803171 ±-14% -32.8% -540079 ±-22% sched_debug.cfs_rq[46]:/.spread0 -798603 ±-14% -32.2% -541575 ±-23% sched_debug.cfs_rq[47]:/.spread0 -236187 ±-45% -106.5% 15418 ±808% sched_debug.cfs_rq[4]:/.spread0 -240043 ±-46% -105.8% 13821 ±907% sched_debug.cfs_rq[5]:/.spread0 -241134 ±-45% -105.5% 13348 ±932% sched_debug.cfs_rq[6]:/.spread0 -232614 ±-43% -104.6% 10696 ±1210% sched_debug.cfs_rq[7]:/.spread0 -238112 ±-49% -104.9% 11721 ±1075% sched_debug.cfs_rq[8]:/.spread0 -239741 ±-47% -104.1% 9844 ±1305% sched_debug.cfs_rq[9]:/.spread0 42.00 ± 12% -31.5% 28.75 ± 11% sched_debug.cpu#0.load 2239 ± 9% +14.0% 2553 ± 11% sched_debug.cpu#0.sched_goidle 12835 ±102% -75.7% 3118 ± 24% sched_debug.cpu#12.ttwu_count 952259 ± 4% -10.0% 857091 ± 4% sched_debug.cpu#13.avg_idle 3427 ± 0% +19.0% 4078 ± 10% sched_debug.cpu#15.curr->pid 9061 ± 55% +132.5% 21068 ± 47% sched_debug.cpu#22.nr_switches 10463 ± 43% +118.0% 22806 ± 46% sched_debug.cpu#22.sched_count 1.00 ± 70% +75.0% 1.75 ± 93% sched_debug.cpu#28.nr_uninterruptible 228.25 ± 18% +22.0% 278.50 ± 11% sched_debug.cpu#29.sched_goidle 1880 ± 53% -62.9% 698.25 ± 21% sched_debug.cpu#31.nr_switches 2007 ± 50% -58.3% 837.50 ± 17% sched_debug.cpu#31.sched_count 422.50 ± 54% -42.1% 244.75 ± 28% sched_debug.cpu#31.sched_goidle 1014 ± 79% -66.5% 340.00 ± 43% sched_debug.cpu#31.ttwu_count 619.75 ± 70% -69.3% 190.50 ± 37% sched_debug.cpu#31.ttwu_local 2.00 ± 86% -50.0% 1.00 ± 70% sched_debug.cpu#34.nr_uninterruptible 0.50 ±300% +0.0% 0.50 ±100% sched_debug.cpu#35.nr_uninterruptible 1520 ± 12% +47.8% 2247 ± 41% sched_debug.cpu#40.curr->pid 5218 ± 20% -67.4% 1703 ± 15% sched_debug.cpu#41.ttwu_count 3739 ± 56% +101.7% 7542 ± 32% sched_debug.cpu#42.nr_switches 2.75 ± 30% -127.3% -0.75 ±-238% sched_debug.cpu#44.nr_uninterruptible 1870 ± 31% +167.9% 5011 ± 56% sched_debug.cpu#44.ttwu_count 1849 ± 27% -23.8% 1410 ± 0% sched_debug.cpu#46.curr->pid ========================================================================================= tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test: lkp-xbm/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex2 commit: 4ae904c494e475048050994f669137c12274da85 81c72584a480c5a4b7eede527d0b990c83c2dcc9 4ae904c494e47504 81c72584a480c5a4b7eede527d ---------------- -------------------------- %stddev %change %stddev \ | \ 3024654 ± 0% -5.0% 2872390 ± 0% will-it-scale.per_process_ops 2475333 ± 0% -4.8% 2355651 ± 0% will-it-scale.per_thread_ops 7738 ± 15% +205.2% 23616 ± 41% cpuidle.C1E-NHM.time 1484 ± 8% -25.2% 1110 ± 9% sched_debug.cpu#2.curr->pid 1254 ± 12% -15.1% 1064 ± 1% slabinfo.kmalloc-512.active_objs 0.00 ± -1% +Inf% 1437029 ±134% latency_stats.avg.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath 0.00 ± -1% +Inf% 1588478 ±120% latency_stats.max.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath 0.00 ± -1% +Inf% 1699671 ±113% latency_stats.sum.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath 0.66 ± 4% +47.1% 0.97 ± 6% perf-profile.cpu-cycles.___might_sleep.get_futex_key.futex_wait_setup.futex_wait.do_futex 0.00 ± -1% +Inf% 2.16 ± 3% perf-profile.cpu-cycles.___might_sleep.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait 3.68 ± 5% -6.3% 3.45 ± 1% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex 1.29 ± 23% -25.7% 0.96 ± 4% perf-profile.cpu-cycles.get_futex_value_locked.futex_wait_setup.futex_wait.do_futex.sys_futex 21.11 ± 0% +13.5% 23.95 ± 0% perf-profile.cpu-cycles.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait.do_futex 16.09 ± 1% -10.0% 14.48 ± 0% perf-profile.cpu-cycles.gup_pud_range.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait ivb42: Ivytown Ivy Bridge-EP Memory: 64G lkp-xbm: Sandy Bridge Memory: 2G will-it-scale.per_process_ops 5.45e+06 ++---------------------------------------------------------------+ 5.4e+06 *+.*..*.*..*.. .*.. *.. .*.. *.. | | *.. .* .. .*..*.. .*. + *..* 5.35e+06 ++ *..*. * *.. .* *. * | 5.3e+06 ++ *. | | | 5.25e+06 ++ | 5.2e+06 ++ | 5.15e+06 ++ O O O | | O | 5.1e+06 ++ O O | 5.05e+06 ++ O O O O O O O O O O O | O O O O | 5e+06 ++ O O | 4.95e+06 ++---------------------------------------------------------------+ [*] bisect-good sample [O] bisect-bad sample To reproduce: git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git cd lkp-tests bin/lkp install job.yaml # job file is attached in this email bin/lkp run job.yaml Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. Thanks, Ying Huang
--- LKP_SERVER: inn LKP_CGI_PORT: 80 LKP_CIFS_PORT: 139 testcase: will-it-scale default-monitors: wait: activate-monitor kmsg: uptime: iostat: vmstat: numa-numastat: numa-vmstat: numa-meminfo: proc-vmstat: proc-stat: interval: 10 meminfo: slabinfo: interrupts: lock_stat: latency_stats: softirqs: bdi_dev_mapping: diskstats: nfsstat: cpuidle: cpufreq-stats: turbostat: pmeter: sched_debug: interval: 60 cpufreq_governor: performance default-watchdogs: oom-killer: watchdog: commit: 2467d35aecc439fb2513b2c0bd5d9f84c4160b33 model: Ivytown Ivy Bridge-EP nr_cpu: 48 memory: 64G swap_partitions: LABEL=SWAP rootfs_partition: LABEL=LKP-ROOTFS category: benchmark perf-profile: freq: 800 will-it-scale: test: futex1 queue: cyclic testbox: ivb42 tbox_group: ivb42 kconfig: x86_64-rhel enqueue_time: 2015-10-02 17:16:06.962440531 +08:00 id: e51188816935e4c607ffadca6db032aba1223368 user: lkp compiler: gcc-4.9 head_commit: 2467d35aecc439fb2513b2c0bd5d9f84c4160b33 base_commit: 9ffecb10283508260936b96022d4ee43a7798b4c branch: linux-devel/devel-hourly-2015100216 kernel: "/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/vmlinuz-4.3.0-rc3-wl-ath-05763-g2467d35" rootfs: debian-x86_64-2015-02-07.cgz result_root: "/result/will-it-scale/performance-futex1/ivb42/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/0" job_file: "/lkp/scheduled/ivb42/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-2467d35aecc439fb2513b2c0bd5d9f84c4160b33-20151002-32531-1jfkft4-0.yaml" dequeue_time: 2015-10-03 02:18:39.906705839 +08:00 max_uptime: 1500 initrd: "/osimage/debian/debian-x86_64-2015-02-07.cgz" bootloader_append: - root=/dev/ram0 - user=lkp - job=/lkp/scheduled/ivb42/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-2467d35aecc439fb2513b2c0bd5d9f84c4160b33-20151002-32531-1jfkft4-0.yaml - ARCH=x86_64 - kconfig=x86_64-rhel - branch=linux-devel/devel-hourly-2015100216 - commit=2467d35aecc439fb2513b2c0bd5d9f84c4160b33 - BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/vmlinuz-4.3.0-rc3-wl-ath-05763-g2467d35 - max_uptime=1500 - RESULT_ROOT=/result/will-it-scale/performance-futex1/ivb42/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/0 - LKP_SERVER=inn - |2- earlyprintk=ttyS0,115200 systemd.log_level=err debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0 console=ttyS0,115200 console=tty0 vga=normal rw lkp_initrd: "/lkp/lkp/lkp-x86_64.cgz" modules_initrd: "/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/modules.cgz" bm_initrd: "/osimage/deps/debian-x86_64-2015-02-07.cgz/lkp.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/run-ipconfig.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/turbostat.cgz,/lkp/benchmarks/turbostat.cgz,/lkp/benchmarks/will-it-scale.cgz" job_state: finished loadavg: 41.86 18.81 7.34 1/493 9291 start_time: '1443809957' end_time: '1443810267' version: "/lkp/lkp/.src-20151001-230432"
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu10/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu11/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu13/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu14/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu15/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu16/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu17/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu18/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu19/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu20/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu21/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu22/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu23/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu24/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu25/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu26/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu27/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu28/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu29/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu30/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu31/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu32/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu33/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu34/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu35/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu36/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu37/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu38/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu39/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu40/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu41/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu42/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu43/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu44/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu45/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu46/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu47/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu8/cpufreq/scaling_governor echo performance > /sys/devices/system/cpu/cpu9/cpufreq/scaling_governor ./runtest.py futex1 25 both 1 12 24 36 48