On Thu, Dec 15, 2016 at 11:55:01PM +0100, Peter Bex wrote:
> Hi all,
> 
> I've been playing around a little bit with "perf" and Valgrind's
> cachegrind tool, and I noticed that the number of branch prediction
> misses can be reduced if the argvector reusing check can be hardcoded
> for cases where we know the size of the calling function's argvector.
> 
> Here's a simple patch to make this happen (works on both the master
> and chicken-5 branches).

And here's another one, that adds C_likely() and C_unlikely() macros,
a la the Linux kernel's likely() and unlikely().  These are simple
wrappers for __builtin_expect() which tell the compiler which branches
in a conditional expression are likely to be taken.

These are now used in generated code, when we do a C_demand() check to
allocate memory.  This is noticably faster in some benchmarks, and
can bring down compilation time a bit as well.  Attached also are
benchmark results for unpatched CHICKEN 5, the previous patch which
adds static argvector checks and results for both this patch and the
static argvector.

Question: Should this be:

# define C_unlikely(x)             __builtin_expect((x), 0)

or, like Linux:

# define C_unlikely(x)             __builtin_expect(!!(x), 0)

The latter seems to me like it would add more instructions due to
the double negation, and the exact value of the expression in an if()
is never important anyway, as long as it's zero or nonzero.  Right?

Cheers,
Peter
From 5fe5848f5e4cbd8cff39866ce90d9e77a8c95c2c Mon Sep 17 00:00:00 2001
From: Peter Bex <pe...@more-magic.net>
Date: Tue, 27 Dec 2016 17:53:40 +0100
Subject: [PATCH] Add __builtin_expect to help branch prediction.

Currently applied to all C_demand() checks, which are unlikely to fail.
So we make sure the C compiler knows the default branch is more likely.
---
 c-backend.scm | 10 +++++-----
 chicken.h     |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/c-backend.scm b/c-backend.scm
index 5431268..ca94014 100644
--- a/c-backend.scm
+++ b/c-backend.scm
@@ -855,10 +855,10 @@
 		      (when target-stack-size
 			(gen #t "C_resize_stack(" target-stack-size ");") ) )
 		    (gen #t "C_check_nursery_minimum(C_calculate_demand(" demand ",c," max-av "));"
-			 #t "if(!C_demand(C_calculate_demand(" demand ",c," max-av "))){"
+			 #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand ",c," max-av ")))){"
 			 #t "C_save_and_reclaim((void*)C_" topname ",c,av);}"
 			 #t "toplevel_initialized=1;"
-			 #t "if(!C_demand_2(" ldemand ")){"
+			 #t "if(C_unlikely(!C_demand_2(" ldemand "))){"
 			 #t "C_save(t1);"
 			 #t "C_rereclaim2(" ldemand "*sizeof(C_word),1);"
 			 #t "t1=C_restore;}"
@@ -873,7 +873,7 @@
 		  (when (and (not unsafe) (not no-argc-checks) (> n 2) (not empty-closure))
 		    (gen #t "if(c<" n ") C_bad_min_argc_2(c," n ",t0);") )
 		  (when insert-timer-checks (gen #t "C_check_for_interrupt;"))
-		  (gen #t "if(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av "))){"))
+		  (gen #t "if(C_unlikely(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av ")))){"))
 		 (else
 		  (unless direct (gen #t "C_word *a;"))
 		  (when (and direct (not unsafe) (not disable-stack-overflow-checking))
@@ -888,10 +888,10 @@
 			 ;; The interrupt handler may fill the stack, so we only
 			 ;; check for an interrupt when the procedure is restartable
 			 (when insert-timer-checks (gen #t "C_check_for_interrupt;"))
-			 (gen #t "if(!C_demand(C_calculate_demand("
+			 (gen #t "if(C_unlikely(!C_demand(C_calculate_demand("
 			      demand
 			      (if customizable ",0," ",c,")
-			      max-av "))){"))
+			      max-av ")))){"))
 			(else
 			 (gen #\{)))))
 	   (cond ((and (not (eq? 'toplevel id)) (not direct))
diff --git a/chicken.h b/chicken.h
index 0f619aa..76661a7 100644
--- a/chicken.h
+++ b/chicken.h
@@ -222,6 +222,8 @@ void *alloca ();
 #endif
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
+# define C_unlikely(x)             __builtin_expect((x), 0)
+# define C_likely(x)               __builtin_expect((x), 1)
 # ifndef __cplusplus
 #  define C_cblock                ({
 #  define C_cblockend             })
@@ -236,6 +238,9 @@ void *alloca ();
 # if defined(__i386__) && !defined(__clang__)
 #  define C_regparm               __attribute__ ((regparm(3)))
 # endif
+#else
+# define C_unlikely(x)             (x)
+# define C_likely(x)               (x)
 #endif
 
 #ifndef C_cblock
-- 
2.1.4

From c1b57126dd01f103e418a5446886e739363c8e39 Mon Sep 17 00:00:00 2001
From: Peter Bex <pe...@more-magic.net>
Date: Tue, 27 Dec 2016 18:06:12 +0100
Subject: [PATCH] Add __builtin_expect to help branch prediction.

Currently applied to all C_demand() checks, which are unlikely to fail.
So we make sure the C compiler knows the default branch is more likely.
---
 c-backend.scm | 10 +++++-----
 chicken.h     |  7 +++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/c-backend.scm b/c-backend.scm
index a6205db..a5e0f5d 100644
--- a/c-backend.scm
+++ b/c-backend.scm
@@ -827,10 +827,10 @@
 		      (when target-stack-size
 			(gen #t "C_resize_stack(" target-stack-size ");") ) )
 		    (gen #t "C_check_nursery_minimum(C_calculate_demand(" demand ",c," max-av "));"
-			 #t "if(!C_demand(C_calculate_demand(" demand ",c," max-av "))){"
+			 #t "if(C_unlikely(!C_demand(C_calculate_demand(" demand ",c," max-av ")))){"
 			 #t "C_save_and_reclaim((void*)C_" topname ",c,av);}"
 			 #t "toplevel_initialized=1;"
-			 #t "if(!C_demand_2(" ldemand ")){"
+			 #t "if(C_unlikely(!C_demand_2(" ldemand "))){"
 			 #t "C_save(t1);"
 			 #t "C_rereclaim2(" ldemand "*sizeof(C_word),1);"
 			 #t "t1=C_restore;}"
@@ -845,7 +845,7 @@
 		  (when (and (not unsafe) (not no-argc-checks) (> n 2) (not empty-closure))
 		    (gen #t "if(c<" n ") C_bad_min_argc_2(c," n ",t0);") )
 		  (when insert-timer-checks (gen #t "C_check_for_interrupt;"))
-		  (gen #t "if(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av "))){"))
+		  (gen #t "if(C_unlikely(!C_demand(C_calculate_demand((c-" n ")*C_SIZEOF_PAIR +" demand ",c," max-av ")))){"))
 		 (else
 		  (unless direct (gen #t "C_word *a;"))
 		  (when (and direct (not unsafe) (not disable-stack-overflow-checking))
@@ -860,10 +860,10 @@
 			 ;; The interrupt handler may fill the stack, so we only
 			 ;; check for an interrupt when the procedure is restartable
 			 (when insert-timer-checks (gen #t "C_check_for_interrupt;"))
-			 (gen #t "if(!C_demand(C_calculate_demand("
+			 (gen #t "if(C_unlikely(!C_demand(C_calculate_demand("
 			      demand
 			      (if customizable ",0," ",c,")
-			      max-av "))){"))
+			      max-av ")))){"))
 			(else
 			 (gen #\{)))))
 	   (cond ((and (not (eq? 'toplevel id)) (not direct))
diff --git a/chicken.h b/chicken.h
index 733958d..4ae93e0 100644
--- a/chicken.h
+++ b/chicken.h
@@ -245,6 +245,8 @@ void *alloca ();
 #endif
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
+# define C_unlikely(x)             __builtin_expect((x), 0)
+# define C_likely(x)               __builtin_expect((x), 1)
 # ifndef __cplusplus
 #  define C_cblock                ({
 #  define C_cblockend             })
@@ -261,6 +263,11 @@ void *alloca ();
 # endif
 #elif defined(__WATCOMC__)
 # define C_ccall                  __cdecl
+# define C_unlikely(x)             (x)
+# define C_likely(x)               (x)
+#else
+# define C_unlikely(x)             (x)
+# define C_likely(x)               (x)
 #endif
 
 #ifndef C_cblock
-- 
2.1.4

+---[1]:
|-> installation-prefix: /home/sjamaan/chickens/plain-chicken-5
|-> csc-options: 
|-> repetitions: 10

+---[2]:
|-> installation-prefix: /home/sjamaan/chickens/chicken-5-with-static-argv
|-> csc-options: 
|-> repetitions: 10

+---[3]:
|-> installation-prefix: 
/home/sjamaan/chickens/chicken-5-with-static-argv-and-unlikely-demand
|-> csc-options: 
|-> repetitions: 10

Displaying normalized results (larger numbers indicate better results)

===
=== cpu-time
===

Programs                   [1]       [2]       [3]
==================================================
0_________________________1.00______1.00______1.00
binarytrees_______________1.00______1.18______1.16
boyer_____________________1.05______1.00______1.09
browse____________________1.08______1.00______1.00
conform___________________1.00______1.05______1.09
cpstak____________________1.08______1.02______1.00
ctak______________________1.03______1.00______1.00
dderiv____________________1.18______1.00______1.15
deriv_____________________1.07______1.08______1.00
destructive_______________1.03______1.00______1.00
dfa_______________________1.00______1.20______1.18
div-iter__________________1.05______1.00______1.05
div-rec___________________1.09______1.24______1.00
dynamic___________________1.00______1.42______1.27
earley____________________1.02______1.12______1.00
fannkuch__________________1.00______1.13______1.11
fft_______________________1.00______1.31______1.26
fib_______________________1.00______1.45______1.47
fibc______________________1.00______1.22______1.24
fibfp_____________________1.00______1.44______1.47
fprint____________________1.00______1.37______1.35
fread_____________________1.00______1.24______1.22
gcbench___________________1.00______1.15______1.14
gold______________________1.00______1.22______1.26
gold2_____________________1.00______1.28______1.30
graphs____________________1.00______1.17______1.20
hanoi_____________________1.00______1.39______1.39
integ_____________________1.00______1.00______1.16
integ2____________________1.04______1.00______1.14
kanren____________________1.00______1.13______1.29
kernwyk-ackermann_________1.00______1.00______1.15
kernwyk-array_____________1.00______1.03______1.13
kernwyk-cat_______________1.00______1.12______1.50
kernwyk-string____________1.00______1.05______1.28
kernwyk-sum_______________1.14______1.00______1.30
kernwyk-tail______________1.08______1.00______1.27
kernwyk-wc________________1.00______1.03______1.26
knucleotide_______________1.00______1.02______1.33
lattice___________________1.02______1.00______1.20
matrix____________________1.07______1.00______1.16
maze______________________1.22______1.00______1.38
mazefun___________________1.12______1.00______1.16
mbrot_____________________1.00______1.07______1.30
nbody_____________________1.18______1.00______1.15
nboyer____________________1.11______1.00______1.12
nestedloop________________1.15______1.00______1.16
nfa_______________________1.14______1.00______1.14
nqueens___________________1.07______1.00______1.03
ntakl_____________________1.00______1.06______1.03
nucleic2__________________1.00______1.02______1.02
paraffins_________________1.00______1.00______1.00
parsing___________________1.00______1.00______1.00
pnpoly____________________1.03______1.00______1.02
primes____________________1.02______1.00______1.00
psyntax___________________1.00______1.00______1.01
puzzle____________________1.00______1.05______1.21
ray_______________________1.00______1.01______1.00
ray2______________________1.01______1.00______1.00
sboyer____________________1.03______1.00______1.00
scheme____________________1.32______1.25______1.00
sieves-eratosthenes_______1.00______1.00______1.00
simplex___________________1.00______1.00______1.01
slatex____________________1.06______1.00______1.06
sort1_____________________1.00______1.00______1.01
tak_______________________1.00______1.04______1.03
takl______________________1.10______1.00______1.00
takr______________________1.00______1.00______1.01
traverse__________________1.00______1.00______1.00
travinit__________________1.00______1.03______1.08
triangl___________________1.04______1.00______1.02


Attachment: signature.asc
Description: Digital signature

_______________________________________________
Chicken-hackers mailing list
Chicken-hackers@nongnu.org
https://lists.nongnu.org/mailman/listinfo/chicken-hackers

Reply via email to