I am working with something that does a lot of SHA1's. I am trying to profile my application and generate flame graphs (see http://www.brendangregg.com/flamegraphs.html ), but profiling tools cannot successfully backtrace when the processor is running the optimized SHA1 code on x86_64. This patch adds CFI directives when compiled with a GNU assembler to enable tools that understand DWARF debugging information to backtrace in this circumstance.
I don't have a build environment for win64, but I did verify that the perl code does not generate the CFI directives if we are not generating code for the GNU assembler (IE if $cfi is not set). -Matt commit 9522d706fa58679abd0b6f923aad623fad39abe5 Author: Matt Cross <matt.cr...@gmail.com> Date: Wed Mar 25 14:15:37 2015 -0400 Add CFI directives to the x86_64 SHA1 implementation to allow DWARF aware utilities to backtrace through these routines. diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index 9bb6b49..9fe7b2b 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -95,6 +95,7 @@ die "can't locate x86_64-xlate.pl"; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` =~ /GNU assembler version ([2-9]\.[0-9]+)/) { $avx = ($1>=2.19) + ($1>=2.22); + $cfi = 1 } if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && @@ -247,6 +248,8 @@ $code.=<<___; .type sha1_block_data_order,\@function,3 .align 16 sha1_block_data_order: +`".cfi_startproc" if $cfi` + mov OPENSSL_ia32cap_P+0(%rip),%r9d mov OPENSSL_ia32cap_P+4(%rip),%r8d mov OPENSSL_ia32cap_P+8(%rip),%r10d @@ -275,17 +278,35 @@ $code.=<<___; .align 16 .Lialu: mov %rsp,%rax +`".cfi_def_cfa_register rax" if $cfi` push %rbx +# The CFA (Cononical Frame Address) is after the pushed return value, so RBX was just stored at CFA - 16: +`".cfi_offset rbx,-16" if $cfi` push %rbp +`".cfi_offset rbp,-24" if $cfi` push %r12 +`".cfi_offset r12,-32" if $cfi` push %r13 +`".cfi_offset r13,-40" if $cfi` push %r14 +`".cfi_offset r14,-48" if $cfi` mov %rdi,$ctx # reassigned argument sub \$`8+16*4`,%rsp mov %rsi,$inp # reassigned argument and \$-64,%rsp mov %rdx,$num # reassigned argument mov %rax,`16*4`(%rsp) +# This adds a "CFA expression" to say that the CFA is calculated by reading the value at RSP+0x40, and adding 8 to it: +# DW_CFA_def_cfa_expression 0x0f : says CFA is calculated by evaluating the following expression +# BLOCK +# length (ULEB128) 0x06 : number of bytes remaining +# DW_OP_breg7 0x40 0x77 0xc0 0x00 : read RSP, add 0x40, and push onto stack - note SLEB128 encoding of 0x40 +# requires 2 bytes to avoid sign extension +# DW_OP_deref 0x06 : read from addr on top of stack +# DW_OP_plus_uconst 0x8 0x23 0x08 : pop top of stack, add 8, push back onto stack + +`".cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08" if $cfi` + .Lprologue: mov 0($ctx),$A @@ -319,14 +340,22 @@ $code.=<<___; jnz .Lloop mov `16*4`(%rsp),%rsi +`".cfi_def_cfa rsi,8" if $cfi` mov -40(%rsi),%r14 +`".cfi_restore r14" if $cfi` mov -32(%rsi),%r13 +`".cfi_restore r13" if $cfi` mov -24(%rsi),%r12 +`".cfi_restore r12" if $cfi` mov -16(%rsi),%rbp +`".cfi_restore rbp" if $cfi` mov -8(%rsi),%rbx +`".cfi_restore rbx" if $cfi` lea (%rsi),%rsp +`".cfi_def_cfa rsp,8" if $cfi` .Lepilogue: ret +`".cfi_endproc" if $cfi` .size sha1_block_data_order,.-sha1_block_data_order ___ if ($shaext) {{{ @@ -342,6 +371,7 @@ $code.=<<___; .align 32 sha1_block_data_order_shaext: _shaext_shortcut: +`".cfi_startproc" if $cfi` ___ $code.=<<___ if ($win64); lea `-8-4*16`(%rsp),%rsp @@ -440,6 +470,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +`".cfi_endproc" if $cfi` .size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext ___ }}} @@ -473,12 +504,19 @@ $code.=<<___; .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: +`".cfi_startproc" if $cfi` mov %rsp,%rax +`".cfi_def_cfa_register rax" if $cfi` push %rbx +`".cfi_offset rbx,-16" if $cfi` push %rbp +`".cfi_offset rbp,-24" if $cfi` push %r12 +`".cfi_offset r12,-32" if $cfi` push %r13 # redundant, done to share Win64 SE handler +`".cfi_offset r13,-40" if $cfi` push %r14 +`".cfi_offset r14,-48" if $cfi` lea `-64-($win64?6*16:0)`(%rsp),%rsp ___ $code.=<<___ if ($win64); @@ -492,6 +530,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%r14 # original %rsp +`".cfi_def_cfa_register r14" if $cfi` and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument @@ -907,14 +946,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r14),%rsi +`".cfi_def_cfa_register rsi" if $cfi` mov -40(%rsi),%r14 +`".cfi_restore r14" if $cfi` mov -32(%rsi),%r13 +`".cfi_restore r13" if $cfi` mov -24(%rsi),%r12 +`".cfi_restore r12" if $cfi` mov -16(%rsi),%rbp +`".cfi_restore rbp" if $cfi` mov -8(%rsi),%rbx +`".cfi_restore rbx" if $cfi` lea (%rsi),%rsp +`".cfi_def_cfa_register rsp" if $cfi` .Lepilogue_ssse3: ret +`".cfi_endproc" if $cfi` .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 ___ @@ -935,12 +982,19 @@ $code.=<<___; .align 16 sha1_block_data_order_avx: _avx_shortcut: +`".cfi_startproc" if $cfi` mov %rsp,%rax +`".cfi_def_cfa_register rax" if $cfi` push %rbx +`".cfi_offset rbx,-16" if $cfi` push %rbp +`".cfi_offset rbp,-24" if $cfi` push %r12 +`".cfi_offset r12,-32" if $cfi` push %r13 # redundant, done to share Win64 SE handler +`".cfi_offset r13,-40" if $cfi` push %r14 +`".cfi_offset r14,-48" if $cfi` lea `-64-($win64?6*16:0)`(%rsp),%rsp vzeroupper ___ @@ -955,6 +1009,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%r14 # original %rsp +`".cfi_def_cfa_register r14" if $cfi` and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument @@ -1271,14 +1326,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r14),%rsi +`".cfi_def_cfa_register rsi" if $cfi` mov -40(%rsi),%r14 +`".cfi_restore r14" if $cfi` mov -32(%rsi),%r13 +`".cfi_restore r13" if $cfi` mov -24(%rsi),%r12 +`".cfi_restore r12" if $cfi` mov -16(%rsi),%rbp +`".cfi_restore rbp" if $cfi` mov -8(%rsi),%rbx +`".cfi_restore rbx" if $cfi` lea (%rsi),%rsp +`".cfi_def_cfa_register rsp" if $cfi` .Lepilogue_avx: ret +`".cfi_endproc" if $cfi` .size sha1_block_data_order_avx,.-sha1_block_data_order_avx ___ @@ -1302,12 +1365,19 @@ $code.=<<___; .align 16 sha1_block_data_order_avx2: _avx2_shortcut: +`".cfi_startproc" if $cfi` mov %rsp,%rax +`".cfi_def_cfa_register rax" if $cfi` push %rbx +`".cfi_offset rbx,-16" if $cfi` push %rbp +`".cfi_offset rbp,-24" if $cfi` push %r12 +`".cfi_offset r12,-32" if $cfi` push %r13 +`".cfi_offset r13,-40" if $cfi` push %r14 +`".cfi_offset r14,-48" if $cfi` vzeroupper ___ $code.=<<___ if ($win64); @@ -1322,6 +1392,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%r14 # original %rsp +`".cfi_def_cfa_register r14" if $cfi` mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument mov %rdx,$num # reassigned argument @@ -1750,14 +1821,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r14),%rsi +`".cfi_def_cfa_register rsi" if $cfi` mov -40(%rsi),%r14 +`".cfi_restore r14" if $cfi` mov -32(%rsi),%r13 +`".cfi_restore r13" if $cfi` mov -24(%rsi),%r12 +`".cfi_restore r12" if $cfi` mov -16(%rsi),%rbp +`".cfi_restore rbp" if $cfi` mov -8(%rsi),%rbx +`".cfi_restore rbx" if $cfi` lea (%rsi),%rsp +`".cfi_def_cfa_register rsp" if $cfi` .Lepilogue_avx2: ret +`".cfi_endproc" if $cfi` .size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 ___ }
_______________________________________________ openssl-dev mailing list To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev