On Thu, Dec 15, 2016 at 11:55:01PM +0100, Peter Bex wrote: > Hi all, > > I've been playing around a little bit with "perf" and Valgrind's > cachegrind tool, and I noticed that the number of branch prediction > misses can be reduced if the argvector reusing check can be hardcoded > for cases where we know the size of the calling function's argvector. > > Here's a simple patch to make this happen (works on both the master > and chicken-5 branches).
And here's another one, that adds C_likely() and C_unlikely() macros, a la the Linux kernel's likely() and unlikely(). These are simple wrappers for __builtin_expect() which tell the compiler which branches in a conditional expression are likely to be taken. These are now used in generated code, when we do a C_demand() check to allocate memory. This is noticably faster in some benchmarks, and can bring down compilation time a bit as well. Attached also are benchmark results for unpatched CHICKEN 5, the previous patch which adds static argvector checks and results for both this patch and the static argvector. Question: Should this be: # define C_unlikely(x) __builtin_expect((x), 0) or, like Linux: # define C_unlikely(x) __builtin_expect(!!(x), 0) The latter seems to me like it would add more instructions due to the double negation, and the exact value of the expression in an if() is never important anyway, as long as it's zero or nonzero. Right? Cheers, Peter
From 5fe5848f5e4cbd8cff39866ce90d9e77a8c95c2c Mon Sep 17 00:00:00 2001 From: Peter Bex <pe...@more-magic.net> Date: Tue, 27 Dec 2016 17:53:40 +0100 Subject: [PATCH] Add __builtin_expect to help branch prediction. Currently applied to all C_demand() checks, which are unlikely to fail. So we make sure the C compiler knows the default branch is more likely. --- c-backend.scm | 10 +++++----- chicken.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/c-backend.scm b/c-backend.scm index 5431268..ca94014 100644 --- a/c-backend.scm +++ b/c-backend.scm @@ -855,10 +855,10 @@ (when target-stack-size (gen #t "C_resize_stack(" target-stack-size ");") ) ) (gen #t "C_check_nursery_minimum(C_calculate_demand(" demand ",c," max-av "));" - #t "if(!C_demand(C_calculate_demand(" demand ",c," max-av "))){" + #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand ",c," max-av ")))){" #t "C_save_and_reclaim((void*)C_" topname ",c,av);}" #t "toplevel_initialized=1;" - #t "if(!C_demand_2(" ldemand ")){" + #t "if(C_unlikely(!C_demand_2(" ldemand "))){" #t "C_save(t1);" #t "C_rereclaim2(" ldemand "*sizeof(C_word),1);" #t "t1=C_restore;}" @@ -873,7 +873,7 @@ (when (and (not unsafe) (not no-argc-checks) (> n 2) (not empty-closure)) (gen #t "if(c<" n ") C_bad_min_argc_2(c," n ",t0);") ) (when insert-timer-checks (gen #t "C_check_for_interrupt;")) - (gen #t "if(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av "))){")) + (gen #t "if(C_unlikely(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av ")))){")) (else (unless direct (gen #t "C_word *a;")) (when (and direct (not unsafe) (not disable-stack-overflow-checking)) @@ -888,10 +888,10 @@ ;; The interrupt handler may fill the stack, so we only ;; check for an interrupt when the procedure is restartable (when insert-timer-checks (gen #t "C_check_for_interrupt;")) - (gen #t "if(!C_demand(C_calculate_demand(" + (gen #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand (if customizable ",0," ",c,") - max-av "))){")) + max-av ")))){")) (else (gen #\{))))) (cond ((and (not (eq? 'toplevel id)) (not direct)) diff --git a/chicken.h b/chicken.h index 0f619aa..76661a7 100644 --- a/chicken.h +++ b/chicken.h @@ -222,6 +222,8 @@ void *alloca (); #endif #if defined(__GNUC__) || defined(__INTEL_COMPILER) +# define C_unlikely(x) __builtin_expect((x), 0) +# define C_likely(x) __builtin_expect((x), 1) # ifndef __cplusplus # define C_cblock ({ # define C_cblockend }) @@ -236,6 +238,9 @@ void *alloca (); # if defined(__i386__) && !defined(__clang__) # define C_regparm __attribute__ ((regparm(3))) # endif +#else +# define C_unlikely(x) (x) +# define C_likely(x) (x) #endif #ifndef C_cblock -- 2.1.4
From c1b57126dd01f103e418a5446886e739363c8e39 Mon Sep 17 00:00:00 2001 From: Peter Bex <pe...@more-magic.net> Date: Tue, 27 Dec 2016 18:06:12 +0100 Subject: [PATCH] Add __builtin_expect to help branch prediction. Currently applied to all C_demand() checks, which are unlikely to fail. So we make sure the C compiler knows the default branch is more likely. --- c-backend.scm | 10 +++++----- chicken.h | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/c-backend.scm b/c-backend.scm index a6205db..a5e0f5d 100644 --- a/c-backend.scm +++ b/c-backend.scm @@ -827,10 +827,10 @@ (when target-stack-size (gen #t "C_resize_stack(" target-stack-size ");") ) ) (gen #t "C_check_nursery_minimum(C_calculate_demand(" demand ",c," max-av "));" - #t "if(!C_demand(C_calculate_demand(" demand ",c," max-av "))){" + #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand ",c," max-av ")))){" #t "C_save_and_reclaim((void*)C_" topname ",c,av);}" #t "toplevel_initialized=1;" - #t "if(!C_demand_2(" ldemand ")){" + #t "if(C_unlikely(!C_demand_2(" ldemand "))){" #t "C_save(t1);" #t "C_rereclaim2(" ldemand "*sizeof(C_word),1);" #t "t1=C_restore;}" @@ -845,7 +845,7 @@ (when (and (not unsafe) (not no-argc-checks) (> n 2) (not empty-closure)) (gen #t "if(c<" n ") C_bad_min_argc_2(c," n ",t0);") ) (when insert-timer-checks (gen #t "C_check_for_interrupt;")) - (gen #t "if(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av "))){")) + (gen #t "if(C_unlikely(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av ")))){")) (else (unless direct (gen #t "C_word *a;")) (when (and direct (not unsafe) (not disable-stack-overflow-checking)) @@ -860,10 +860,10 @@ ;; The interrupt handler may fill the stack, so we only ;; check for an interrupt when the procedure is restartable (when insert-timer-checks (gen #t "C_check_for_interrupt;")) - (gen #t "if(!C_demand(C_calculate_demand(" + (gen #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand (if customizable ",0," ",c,") - max-av "))){")) + max-av ")))){")) (else (gen #\{))))) (cond ((and (not (eq? 'toplevel id)) (not direct)) diff --git a/chicken.h b/chicken.h index 733958d..4ae93e0 100644 --- a/chicken.h +++ b/chicken.h @@ -245,6 +245,8 @@ void *alloca (); #endif #if defined(__GNUC__) || defined(__INTEL_COMPILER) +# define C_unlikely(x) __builtin_expect((x), 0) +# define C_likely(x) __builtin_expect((x), 1) # ifndef __cplusplus # define C_cblock ({ # define C_cblockend }) @@ -261,6 +263,11 @@ void *alloca (); # endif #elif defined(__WATCOMC__) # define C_ccall __cdecl +# define C_unlikely(x) (x) +# define C_likely(x) (x) +#else +# define C_unlikely(x) (x) +# define C_likely(x) (x) #endif #ifndef C_cblock -- 2.1.4
+---[1]: |-> installation-prefix: /home/sjamaan/chickens/plain-chicken-5 |-> csc-options: |-> repetitions: 10 +---[2]: |-> installation-prefix: /home/sjamaan/chickens/chicken-5-with-static-argv |-> csc-options: |-> repetitions: 10 +---[3]: |-> installation-prefix: /home/sjamaan/chickens/chicken-5-with-static-argv-and-unlikely-demand |-> csc-options: |-> repetitions: 10 Displaying normalized results (larger numbers indicate better results) === === cpu-time === Programs [1] [2] [3] ================================================== 0_________________________1.00______1.00______1.00 binarytrees_______________1.00______1.18______1.16 boyer_____________________1.05______1.00______1.09 browse____________________1.08______1.00______1.00 conform___________________1.00______1.05______1.09 cpstak____________________1.08______1.02______1.00 ctak______________________1.03______1.00______1.00 dderiv____________________1.18______1.00______1.15 deriv_____________________1.07______1.08______1.00 destructive_______________1.03______1.00______1.00 dfa_______________________1.00______1.20______1.18 div-iter__________________1.05______1.00______1.05 div-rec___________________1.09______1.24______1.00 dynamic___________________1.00______1.42______1.27 earley____________________1.02______1.12______1.00 fannkuch__________________1.00______1.13______1.11 fft_______________________1.00______1.31______1.26 fib_______________________1.00______1.45______1.47 fibc______________________1.00______1.22______1.24 fibfp_____________________1.00______1.44______1.47 fprint____________________1.00______1.37______1.35 fread_____________________1.00______1.24______1.22 gcbench___________________1.00______1.15______1.14 gold______________________1.00______1.22______1.26 gold2_____________________1.00______1.28______1.30 graphs____________________1.00______1.17______1.20 hanoi_____________________1.00______1.39______1.39 integ_____________________1.00______1.00______1.16 integ2____________________1.04______1.00______1.14 kanren____________________1.00______1.13______1.29 kernwyk-ackermann_________1.00______1.00______1.15 kernwyk-array_____________1.00______1.03______1.13 kernwyk-cat_______________1.00______1.12______1.50 kernwyk-string____________1.00______1.05______1.28 kernwyk-sum_______________1.14______1.00______1.30 kernwyk-tail______________1.08______1.00______1.27 kernwyk-wc________________1.00______1.03______1.26 knucleotide_______________1.00______1.02______1.33 lattice___________________1.02______1.00______1.20 matrix____________________1.07______1.00______1.16 maze______________________1.22______1.00______1.38 mazefun___________________1.12______1.00______1.16 mbrot_____________________1.00______1.07______1.30 nbody_____________________1.18______1.00______1.15 nboyer____________________1.11______1.00______1.12 nestedloop________________1.15______1.00______1.16 nfa_______________________1.14______1.00______1.14 nqueens___________________1.07______1.00______1.03 ntakl_____________________1.00______1.06______1.03 nucleic2__________________1.00______1.02______1.02 paraffins_________________1.00______1.00______1.00 parsing___________________1.00______1.00______1.00 pnpoly____________________1.03______1.00______1.02 primes____________________1.02______1.00______1.00 psyntax___________________1.00______1.00______1.01 puzzle____________________1.00______1.05______1.21 ray_______________________1.00______1.01______1.00 ray2______________________1.01______1.00______1.00 sboyer____________________1.03______1.00______1.00 scheme____________________1.32______1.25______1.00 sieves-eratosthenes_______1.00______1.00______1.00 simplex___________________1.00______1.00______1.01 slatex____________________1.06______1.00______1.06 sort1_____________________1.00______1.00______1.01 tak_______________________1.00______1.04______1.03 takl______________________1.10______1.00______1.00 takr______________________1.00______1.00______1.01 traverse__________________1.00______1.00______1.00 travinit__________________1.00______1.03______1.08 triangl___________________1.04______1.00______1.02
signature.asc
Description: Digital signature
_______________________________________________ Chicken-hackers mailing list Chicken-hackers@nongnu.org https://lists.nongnu.org/mailman/listinfo/chicken-hackers