Hi Andy, I haven't witnessed any problems like you describe, so maybe the OS I am using is well behaved. Interestingly, I just looked at the assembly generated by switching to the generic ROTR definition:
#define ROTR(x,s) (((x)>>s) | (x)<<(64-s)) I was surprised to see that the compiler is smart enough to use a rotrdi instruction anyway in this case. In fact the code generated is exactly the same as when using the explicit rotrdi version of the macro. So for this particular compiler (gcc 4.1.1 based), it doesn't seem to matter which version of the macro we pick, so your suggestion is fine. On the other hand, if the other cases you mentioned where the upper half of the registered get wiped exhibit the same compiler behavior, it wouldn't be fixing any problems to switch to the generic ROTR. The output for "gcc -dumpspecs" you requested is attached. -Ben -----Original Message----- From: Andy Polyakov via RT [mailto:r...@openssl.org] Sent: Saturday, September 12, 2009 9:21 AM To: Ben Nason Cc: openssl-dev@openssl.org Subject: [Fwd: Re: [openssl.org #1998] [PATCH] SHA512 ROTR macro fix for PowerPC using LP32 model] Oops. Should have replied to r...@openssl.org. Sorry about duplicate. A. -------- Original Message -------- Subject: Re: [openssl.org #1998] [PATCH] SHA512 ROTR macro fix for PowerPC using LP32 model Date: Sat, 12 Sep 2009 18:57:58 +0200 From: Andy Polyakov <ap...@fy.chalmers.se> Reply-To: openssl-dev@openssl.org To: openssl-dev@openssl.org References: <rt-ticket-1...@openssl.org> <b71dcb610d2a2f41b8d20b868c6ecead05313...@superfly.netflix.com> <rt-3.4.5-14104-1249285995-1620.1998-2...@openssl.org> > I just ran into a bug where SHA384/512 were not being calculated > correctly on the Cell processor. I tracked it down to the definition of > the ROTR macro, which is assuming a 64 bit long, but in this case the > compiler is using the LP32 model so long is 32 bits and the values were > being truncated. Here is the patch I did that fixes the problem: > > --- sha512.c 2009-07-27 15:04:52.546574000 -0700 > +++ sha512.c 2009-07-27 15:08:07.373452100 -0700 > @@ -344,7 +344,7 @@ > ((SHA_LONG64)hi)<<32|lo; }) > # endif > # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || > defined(_ARCH_PPC64) > -# define ROTR(a,n) ({ unsigned long ret; \ > +# define ROTR(a,n) ({ SHA_LONG64 ret; \ > asm ("rotrdi %0,%1,%2" \ > : "=r"(ret) \ > : "r"(a),"K"(n)); ret; }) Trouble with using rotrdi in ILP32 PPC application context is that there are PPC OSes that wipe upper halves of registers upon signal delivery. All PPC OSes do preserve full registers upon context switch, so that the code works fine in e.g. openssl test suite, but it will eventually fail in real-life application, one utilizing asynchronous signaling in some way. The problem would appear sporadic and rare, because signal would have to be delivered in very specific moments. The only way to safely use 64-bit instructions in ILP32 process (on these PPC OSes) is to block signals upon entry to such subroutine and unblock upon return. As for "these PPC OSes." Both Linux and AIX are ill-behaved, but not MacOS X (starting with one supporting 64-bit applications). In other words correct solution for the problem is to make sure the macro is *not* defined in ILP32 context. Meaning that we have to reiterate #elif expression instead of fixing the macro. What's your compiler version? Submit 'gcc -dumpspecs' output (I assume it's gcc). A. ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org
*asm: %{!mlp64:-mcelloslv2} -a64 -mbig -mcellppu %{.s: %{mregnames} %{mno-regnames}} %{.S: %{mregnames} %{mno-regnames}} %{v:-V} %{Qy:} %{!Qn:-Qy} %{Wa,*:%*} *asm_debug: %{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}} *asm_final: *asm_options: %a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %{save-temps-o:%{o*:%p}}%d%w%u%O} *asm_only: *invoke_as: %{!S:-o %{save-temps-o:%{o*:%p}}%|.s | as %(asm_options) %{save-temps-o:%{o*:%p}}%|.s %A } *cpp: -D__PPU__ -D__CELLOS_LV2__ %{fno-rtti:-D__NO_RTTI} %{fno-exceptions:-D_NO_EX} %{mlp64:-funsigned-char} *cpp_options: %(cpp_unique_options) %1 %{m*} %{std*&ansi&trigraphs} %{W*&pedantic*} %{w} %{f*} %{g*:%{!g0:%{!fno-working-directory:-fworking-directory}}} %{O*} %{undef} %{save-temps|save-temps-o:-fpch-preprocess} *cpp_debug_options: %{d*} *cpp_unique_options: %{C|CC:%{!E:%eGCC does not support -C or -CC without -E}} %{!Q:-quiet} %{nostdinc*} %{C} %{CC} %{v} %{I*&F*} %{P} %I %{MD:-MD %{!o:%b.d}%{o*:%.d%*}} %{MMD:-MMD %{!o:%b.d}%{o*:%.d%*}} %{M} %{MM} %{MF*} %{MG} %{MP} %{MQ*} %{MT*} %{MV} %{MC} %{!E:%{!M:%{!MM:%{MD|MMD:%{o*:-MQ %*}}}}} %{remap} %{g3:-dD} %{H} %C %{D*&U*&A*} %{i*} %Z %i %{fmudflap:-D_MUDFLAP -include mf-runtime.h} %{fmudflapth:-D_MUDFLAP -D_MUDFLAPTH -include mf-runtime.h} %{E|M|MM:%W{o*}} *trad_capable_cpp: cc1 -E %{traditional|ftraditional|traditional-cpp:-traditional-cpp} *cc1: %{!mno-altivec:%{!mabi=*:-mabi=altivec}} %{!mno-altivec:-maltivec} %{!mno-strict-align:-mstrict-align} %{!fno-strict-aligned:-fstrict-aligned} %{mlp64:-funsigned-char} %{!mvrsave:-mno-vrsave} %{!mtraceback=*:-mtraceback=none} %{mcellos-kernel:-mno-nop-after-bl} %{mcellos-kernel:-mno-save-restore-tocbase} %{mcellos-kernel:-falign-functions=1} %{mcellos-kernel:-falign-jumps=1} %{mcellos-kernel:-falign-labels=1} %{mcellos-kernel:-falign-loops=1} *cc1_options: %{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}} %1 %{!Q:-quiet} -dumpbase %B %{d*} %{m*} %{a*} %{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}%{!c:%{!S:-auxbase %b}} %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} %{v:-version} %{pg:-p} %{p} %{save-temps*:%<fpch-deps} %{f*} %{undef} %{Qn:-fno-ident} %{--help:--help} %{--target-help:--target-help} %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}} %{fsyntax-only:-o %j} %{-param*} %{fmudflap|fmudflapth:-fno-builtin -fno-merge-constants} %{coverage:-fprofile-arcs -ftest-coverage} *cc1plus: %{!fthreadsafe-statics:-fno-threadsafe-statics} *cc1_only: %{!testing:%{!std=*:-std=gnu99}} *link_gcc_c_sequence: %L *link_ssp: %{fstack-protector|fstack-protector-all:-lssp_nonshared -lssp} *endfile: %{shared: crtendS.o%s; :crtend.o%s} ecrtn.o%s *link: %{!static:--eh-frame-hdr} %{h*} %{v:-V} %{!msdata=none:%{G*}} %{msdata=none:-G0} %{YP,*} %{R*} %{Qy:} %{!Qn:-Qy} %{mlp64:-melf64ppc} %{mno-sn-ld: %{mno-toc: %e-mno-toc can not be used with -mno-sn-ld}; :%{!mlp64:--alternative-ld=ps3ppuld --gnu-mode %{fno-exceptions: --no-exceptions} %{mno-toc: --no-toc-restore} %{!mno-prxfixup: %{!mforce-prx-fixup: %{!r: --prx-fixup}}} %{mprx|mprx-with-runtime:--no-check-unresolved} %{mppuguid: %{!r:-ppuguid}} %{r|mno-ppuguid:-no-ppuguid} } } %{shared} *lib: %{!mno-sn-ld: -L%R/lib} --start-group %{mfast-libc: -lcs; :-lc} -lgcc -lstdc++ -lsupc++ %:if-file-exist(%:prepend-cmddir(../../../target/ppu/lib/libsnc.a) -lsnc) %{mlv2-stub|!mno-prxfixup:-llv2_stub; :-llv2} -lsyscall --end-group %{r|T: ; : %{!mno-prxfixup: -T %R/lib/elf64_lv2_prx.x ; : %:use-script-file-if-exist(%:prepend-cmddir(../../../non_release_image/target/ppu/lib/elf64_lv2_no_prx.x))}} *mfwrap: %{static: %{fmudflap|fmudflapth: --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc --wrap=mmap --wrap=munmap --wrap=alloca} %{fmudflapth: --wrap=pthread_create}} %{fmudflap|fmudflapth: --wrap=main} *mflib: %{fmudflap|fmudflapth: -export-dynamic} *libgcc: -lgcc *startfile: ecrti.o%s %{!shared:%{!mmambo:crt0.o%s crt1.o%s;: mmambo:--whole-archive mambo-crt1.o%s libmambo.a%s --no-whole-archive}} %{shared: crtbeginS.o%s; :crtbegin.o%s} %{mppuguid: %{!r: %{mno-sn-ld: %R/lib/crtid.o;:}}} *switches_need_spaces: *cross_compile: 1 *version: 4.1.1 *multilib: . !fno-exceptions !fno-rtti;fno-exceptions fno-exceptions !fno-rtti;fno-exceptions/fno-rtti fno-exceptions fno-rtti; *multilib_defaults: *multilib_extra: *multilib_matches: mcpu=401 msoft-float;mcpu=403 msoft-float;mcpu=405 msoft-float;mcpu=440 msoft-float;mcpu=ec603e msoft-float;mcpu=801 msoft-float;mcpu=821 msoft-float;mcpu=823 msoft-float;mcpu=860 msoft-float;fno-exceptions fno-exceptions;fno-rtti fno-rtti; *multilib_exclusions: *multilib_options: fno-exceptions fno-rtti *linker: collect2 *link_libgcc: %D *md_exec_prefix: *md_startfile_prefix: *md_startfile_prefix_1: *startfile_prefix_spec: /lib/ /lib/sys/ /lib/drivers/ *sysroot_spec: %{mno-sn-ld|mprx|mprx-with-runtime|mlp64: --sysroot=%R; : } *sysroot_suffix_spec: *sysroot_hdrs_suffix_spec: *cpp_default: *asm_cpu: %{!mcpu*: %{mpower: %{!mpower2: -mpwr}} %{mpower2: -mpwrx} %{mpowerpc64*: -mppc64} %{!mpowerpc64*: %{mpowerpc*: -mppc}} %{mno-power: %{!mpowerpc*: -mcom}} %{!mno-power: %{!mpower*: %(asm_default)}}} %{mcpu=common: -mcom} %{mcpu=power: -mpwr} %{mcpu=power2: -mpwrx} %{mcpu=power3: -mppc64} %{mcpu=power4: -mpower4} %{mcpu=power5: -mpower4} %{mcpu=power5+: -mpower4} %{mcpu=powerpc: -mppc} %{mcpu=rios: -mpwr} %{mcpu=rios1: -mpwr} %{mcpu=rios2: -mpwrx} %{mcpu=rsc: -mpwr} %{mcpu=rsc1: -mpwr} %{mcpu=rs64a: -mppc64} %{mcpu=401: -mppc} %{mcpu=403: -m403} %{mcpu=405: -m405} %{mcpu=405fp: -m405} %{mcpu=440: -m440} %{mcpu=440fp: -m440} %{mcpu=505: -mppc} %{mcpu=601: -m601} %{mcpu=602: -mppc} %{mcpu=603: -mppc} %{mcpu=603e: -mppc} %{mcpu=ec603e: -mppc} %{mcpu=604: -mppc} %{mcpu=604e: -mppc} %{mcpu=620: -mppc64} %{mcpu=630: -mppc64} %{mcpu=740: -mppc} %{mcpu=750: -mppc} %{mcpu=G3: -mppc} %{mcpu=7400: -mppc -maltivec} %{mcpu=7450: -mppc -maltivec} %{mcpu=G4: -mppc -maltivec} %{mcpu=801: -mppc} %{mcpu=821: -mppc} %{mcpu=823: -mppc} %{mcpu=860: -mppc} %{mcpu=970: -mpower4 -maltivec} %{mcpu=G5: -mpower4 -maltivec} %{mcpu=8540: -me500} %{mcpu=cell: -mcellppu} %{mcpu=cellppu: -mcellppu} %{maltivec: -maltivec} -many *asm_default: *prx_fixup: ppu-lv2-prx-fixup *prx_link_command: *lv2_old_link_command: %{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S: %(linker) %l %{pie:-pie} %X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r} %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}} %{static:} %{L*} %(mfwrap) %(link_libgcc) %o %(mflib) %{fprofile-arcs|fprofile-generate|coverage:-lgcov} %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}} %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} %{r|mno-prxfixup: ; : %{mforce-prx-fixup|mno-sn-ld:%(prx_fixup) %{--target-help: --target-help;: --stub-fix-only %{!o: a.out} %{o*: %*}}} }}}}}}} *link_command: %{mprx|mprx-with-runtime: %(prx_link_command); : %(lv2_old_link_command)}