Hi! There’s now an x86_64-optimized version of fixnum addition and subtraction, which provide a 17% speedup according to the micro-benchmarks in ‘arithmetic.bm’.
I’ve tried something similar for multiplication, but that yielded only 5% (which is sort of expected, since the code is very close to what the naive C code does), so I decided not to include it. Here’s the code, for posterity: # define ASM_MUL(x, y) \ { \ asm volatile goto ("mov %1, %%rcx; mov %0, %%rbx; " \ "test %[tag], %%cl; je %l[slow_mul]; " \ "test %[tag], %%bl; je %l[slow_mul]; " \ "sar %[tag_size], %%rcx; " \ "sub %[tag], %%rbx; " \ "imul %%rbx, %%rcx; jo %l[slow_mul]; " \ "add %[tag], %%rcx; " \ "mov %%rcx, (%[vsp])\n" \ : /* no outputs */ \ : "r" (x), "r" (y), \ [vsp] "r" (sp), [tag] "i" (scm_tc2_int), \ [tag_size] "i" (2) \ : "rcx", "rbx", "memory" \ : slow_mul); \ NEXT; \ } \ slow_mul: \ do { } while (0) Comments welcome! Thanks, Ludo’.