The branch, master has been updated via 9dd99521 A few style tweaks. via 71c4ae23 Move OpenSSL-related MD4/5 defines and imports to lib/mdigest.h from c0268d92 Some improvements for --msgs2stderr and --daemon.
https://git.samba.org/?p=rsync.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 9dd99521381e022d81528696ca2457e17727200b Author: Wayne Davison <wa...@opencoder.net> Date: Mon Jun 1 18:36:22 2020 -0700 A few style tweaks. commit 71c4ae23362bcd5a2f71127f13008f3b1b7f171d Author: Jorrit Jongma <g...@jongma.org> Date: Mon May 25 18:16:19 2020 +0200 Move OpenSSL-related MD4/5 defines and imports to lib/mdigest.h Works just as well, prevents having to repeat them across files ----------------------------------------------------------------------- Summary of changes: checksum.c | 11 -------- lib/mdigest.h | 10 ++++++++ simd-checksum-x86_64.cpp | 67 +++++++++++++++++++++++++++++------------------- 3 files changed, 51 insertions(+), 37 deletions(-) Changeset truncated at 500 lines: diff --git a/checksum.c b/checksum.c index 19df32a3..d6a91e7c 100644 --- a/checksum.c +++ b/checksum.c @@ -30,10 +30,6 @@ #ifdef SUPPORT_XXHASH #include "xxhash.h" #endif -#ifdef USE_OPENSSL -#include "openssl/md4.h" -#include "openssl/md5.h" -#endif extern int am_server; extern int whole_file; @@ -63,13 +59,6 @@ struct name_num_obj valid_checksums = { } }; -#ifndef USE_OPENSSL -#define MD5_CTX md_context -#define MD5_Init md5_begin -#define MD5_Update md5_update -#define MD5_Final(digest, cptr) md5_result(cptr, digest) -#endif - int xfersum_type = 0; /* used for the file transfer checksums */ int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */ diff --git a/lib/mdigest.h b/lib/mdigest.h index 86c1140f..e543d6f3 100644 --- a/lib/mdigest.h +++ b/lib/mdigest.h @@ -1,5 +1,10 @@ /* The include file for both the MD4 and MD5 routines. */ +#ifdef USE_OPENSSL +#include "openssl/md4.h" +#include "openssl/md5.h" +#endif + #define MD4_DIGEST_LEN 16 #define MD5_DIGEST_LEN 16 #define MAX_DIGEST_LEN MD5_DIGEST_LEN @@ -18,6 +23,11 @@ void mdfour_update(md_context *md, const uchar *in, uint32 length); void mdfour_result(md_context *md, uchar digest[MD4_DIGEST_LEN]); #ifndef USE_OPENSSL +#define MD5_CTX md_context +#define MD5_Init md5_begin +#define MD5_Update md5_update +#define MD5_Final(digest, cptr) md5_result(cptr, digest) + void md5_begin(md_context *ctx); void md5_update(md_context *ctx, const uchar *input, uint32 length); void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]); diff --git a/simd-checksum-x86_64.cpp b/simd-checksum-x86_64.cpp index 66f72650..11e82ab2 100644 --- a/simd-checksum-x86_64.cpp +++ b/simd-checksum-x86_64.cpp @@ -61,53 +61,61 @@ /* Compatibility functions to let our SSSE3 algorithm run on SSE2 */ -__attribute__ ((target("sse2"))) static inline __m128i sse_interleave_odd_epi16(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_interleave_odd_epi16(__m128i a, __m128i b) +{ return _mm_packs_epi32( _mm_srai_epi32(a, 16), _mm_srai_epi32(b, 16) ); } -__attribute__ ((target("sse2"))) static inline __m128i sse_interleave_even_epi16(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_interleave_even_epi16(__m128i a, __m128i b) +{ return sse_interleave_odd_epi16( _mm_slli_si128(a, 2), _mm_slli_si128(b, 2) ); } -__attribute__ ((target("sse2"))) static inline __m128i sse_mulu_odd_epi8(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_mulu_odd_epi8(__m128i a, __m128i b) +{ return _mm_mullo_epi16( _mm_srli_epi16(a, 8), _mm_srai_epi16(b, 8) ); } -__attribute__ ((target("sse2"))) static inline __m128i sse_mulu_even_epi8(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_mulu_even_epi8(__m128i a, __m128i b) +{ return _mm_mullo_epi16( _mm_and_si128(a, _mm_set1_epi16(0xFF)), _mm_srai_epi16(_mm_slli_si128(b, 1), 8) ); } -__attribute__ ((target("sse2"))) static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) +{ return _mm_adds_epi16( sse_interleave_even_epi16(a, b), sse_interleave_odd_epi16(a, b) ); } -__attribute__ ((target("ssse3"))) static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) { +__attribute__ ((target("ssse3"))) static inline __m128i sse_hadds_epi16(__m128i a, __m128i b) +{ return _mm_hadds_epi16(a, b); } -__attribute__ ((target("sse2"))) static inline __m128i sse_maddubs_epi16(__m128i a, __m128i b) { +__attribute__ ((target("sse2"))) static inline __m128i sse_maddubs_epi16(__m128i a, __m128i b) +{ return _mm_adds_epi16( sse_mulu_even_epi8(a, b), sse_mulu_odd_epi8(a, b) ); } -__attribute__ ((target("ssse3"))) static inline __m128i sse_maddubs_epi16(__m128i a, __m128i b) { +__attribute__ ((target("ssse3"))) static inline __m128i sse_maddubs_epi16(__m128i a, __m128i b) +{ return _mm_maddubs_epi16(a, b); } @@ -143,7 +151,8 @@ __attribute__ ((target("default"))) static inline __m128i sse_maddubs_epi16(__m1 performance, possibly due to not unrolling+inlining the called targeted functions. */ -__attribute__ ((target("sse2", "ssse3"))) static int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { +__attribute__ ((target("sse2", "ssse3"))) static int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) +{ if (len > 32) { int aligned = ((uintptr_t)buf & 15) == 0; @@ -261,7 +270,8 @@ __attribute__ ((target("sse2", "ssse3"))) static int32 get_checksum1_sse2_32(sch s1 += (uint32)(t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] + t1[8] + t1[9] + t1[10] + t1[11] + t1[12] + t1[13] + t1[14] + t1[15]) + 64*CHAR_OFFSET; */ -__attribute__ ((target("avx2"))) static int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { +__attribute__ ((target("avx2"))) static int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) +{ if (len > 64) { // Instructions reshuffled compared to SSE2 for slightly better performance int aligned = ((uintptr_t)buf & 31) == 0; @@ -367,32 +377,36 @@ __attribute__ ((target("avx2"))) static int32 get_checksum1_avx2_64(schar* buf, return i; } -__attribute__ ((target("default"))) static int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { +__attribute__ ((target("default"))) static int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) +{ return i; } -__attribute__ ((target("default"))) static int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { +__attribute__ ((target("default"))) static int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) +{ return i; } -static inline int32 get_checksum1_default_1(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { - uint32 s1 = *ps1; - uint32 s2 = *ps2; - for (; i < (len-4); i+=4) { - s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET; - s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET); - } - for (; i < len; i++) { - s1 += (buf[i]+CHAR_OFFSET); s2 += s1; - } - *ps1 = s1; - *ps2 = s2; +static inline int32 get_checksum1_default_1(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) +{ + uint32 s1 = *ps1; + uint32 s2 = *ps2; + for (; i < (len-4); i+=4) { + s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET; + s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET); + } + for (; i < len; i++) { + s1 += (buf[i]+CHAR_OFFSET); s2 += s1; + } + *ps1 = s1; + *ps2 = s2; return i; } extern "C" { -uint32 get_checksum1(char *buf1, int32 len) { +uint32 get_checksum1(char *buf1, int32 len) +{ int32 i = 0; uint32 s1 = 0; uint32 s2 = 0; @@ -409,7 +423,8 @@ uint32 get_checksum1(char *buf1, int32 len) { return (s1 & 0xffff) + (s2 << 16); } -} +} // "C" + #endif /* HAVE_SIMD */ #endif /* __cplusplus */ #endif /* __x86_64__ */ -- The rsync repository. _______________________________________________ rsync-cvs mailing list rsync-cvs@lists.samba.org https://lists.samba.org/mailman/listinfo/rsync-cvs