Process data in chunks of four or eight bytes, depending on host, instead of one at a time.
before: 55561 decicycles in av_aes_ctr_crypt after: 52204 decicycles in av_aes_ctr_crypt Signed-off-by: James Almer <jamr...@gmail.com> --- libavutil/aes_ctr.c | 47 ++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/libavutil/aes_ctr.c b/libavutil/aes_ctr.c index c2d6d570e7..d720aa6aaf 100644 --- a/libavutil/aes_ctr.c +++ b/libavutil/aes_ctr.c @@ -24,6 +24,7 @@ #include "aes_ctr.h" #include "aes.h" #include "aes_internal.h" +#include "intreadwrite.h" #include "macros.h" #include "mem.h" #include "random_seed.h" @@ -32,8 +33,7 @@ typedef struct AVAESCTR { uint8_t counter[AES_BLOCK_SIZE]; - uint8_t encrypted_counter[AES_BLOCK_SIZE]; - int block_offset; + DECLARE_ALIGNED(8, uint8_t, encrypted_counter)[AES_BLOCK_SIZE]; AVAES aes; } AVAESCTR; @@ -46,13 +46,11 @@ void av_aes_ctr_set_iv(struct AVAESCTR *a, const uint8_t* iv) { memcpy(a->counter, iv, AES_CTR_IV_SIZE); memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE); - a->block_offset = 0; } void av_aes_ctr_set_full_iv(struct AVAESCTR *a, const uint8_t* iv) { memcpy(a->counter, iv, sizeof(a->counter)); - a->block_offset = 0; } const uint8_t* av_aes_ctr_get_iv(struct AVAESCTR *a) @@ -75,7 +73,6 @@ int av_aes_ctr_init(struct AVAESCTR *a, const uint8_t *key) av_aes_init(&a->aes, key, 128, 0); memset(a->counter, 0, sizeof(a->counter)); - a->block_offset = 0; return 0; } @@ -101,31 +98,29 @@ void av_aes_ctr_increment_iv(struct AVAESCTR *a) { av_aes_ctr_increment_be64(a->counter); memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE); - a->block_offset = 0; } void av_aes_ctr_crypt(struct AVAESCTR *a, uint8_t *dst, const uint8_t *src, int count) { - const uint8_t* src_end = src + count; - const uint8_t* cur_end_pos; - uint8_t* encrypted_counter_pos; - - while (src < src_end) { - if (a->block_offset == 0) { - av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0); - - av_aes_ctr_increment_be64(a->counter + 8); - } - - encrypted_counter_pos = a->encrypted_counter + a->block_offset; - cur_end_pos = src + AES_BLOCK_SIZE - a->block_offset; - cur_end_pos = FFMIN(cur_end_pos, src_end); - - a->block_offset += cur_end_pos - src; - a->block_offset &= (AES_BLOCK_SIZE - 1); + while (count >= AES_BLOCK_SIZE) { + av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0); + av_aes_ctr_increment_be64(a->counter + 8); +#if HAVE_FAST_64BIT + for (int len = 0; len < AES_BLOCK_SIZE; len += 8) + AV_WN64(&dst[len], AV_RN64(&src[len]) ^ AV_RN64A(&a->encrypted_counter[len])); +#else + for (int len = 0; len < AES_BLOCK_SIZE; len += 4) + AV_WN32(&dst[len], AV_RN32(&src[len]) ^ AV_RN32A(&a->encrypted_counter[len])); +#endif + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + count -= AES_BLOCK_SIZE; + } - while (src < cur_end_pos) { - *dst++ = *src++ ^ *encrypted_counter_pos++; - } + if (count > 0) { + av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0); + av_aes_ctr_increment_be64(a->counter + 8); + for (int len = 0; len < count; len++) + dst[len] = src[len] ^ a->encrypted_counter[len]; } } -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".