From: Ramiro Polla <[email protected]>
This will be useful to test more aggressively for failures to mark XMM
registers as clobbered in Win64 builds, and prevent regressions thereof.
---
configure | 4 +++
libavcodec/utils.c | 62 +++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/configure b/configure
index 49f9af2..d75f664 100755
--- a/configure
+++ b/configure
@@ -116,6 +116,9 @@ Configuration options:
disable buffer boundary checking in bitreaders
(faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
+ --enable-win64-test-xmm-clobber
+ check XMM registers for clobbering (Win64-only;
+ should be used only for debugging purposes)
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
--enable-encoder=NAME enable encoder NAME
@@ -991,6 +994,7 @@ CONFIG_LIST="
vda
vdpau
version3
+ win64_test_xmm_clobber
x11grab
zlib
"
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index fa60953..68d664c 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -52,6 +52,52 @@ static int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
static void *codec_mutex;
static void *avformat_mutex;
+#if defined(_WIN64) && CONFIG_WIN64_TEST_XMM_CLOBBER
+#undef exit
+#define testxmmclobbers(func, ...) \
+({ \
+ char xmm[2][10*16]; \
+ int ret; \
+ __asm volatile( \
+ "movaps %%xmm6 , 0x00(%0)\n\t" \
+ "movaps %%xmm7 , 0x10(%0)\n\t" \
+ "movaps %%xmm8 , 0x20(%0)\n\t" \
+ "movaps %%xmm9 , 0x30(%0)\n\t" \
+ "movaps %%xmm10, 0x40(%0)\n\t" \
+ "movaps %%xmm11, 0x50(%0)\n\t" \
+ "movaps %%xmm12, 0x60(%0)\n\t" \
+ "movaps %%xmm13, 0x70(%0)\n\t" \
+ "movaps %%xmm14, 0x80(%0)\n\t" \
+ "movaps %%xmm15, 0x90(%0)\n\t" \
+ ::"r"(xmm[0]): "memory"); \
+ ret = func(__VA_ARGS__); \
+ __asm volatile( \
+ "movaps %%xmm6 , 0x00(%0)\n\t" \
+ "movaps %%xmm7 , 0x10(%0)\n\t" \
+ "movaps %%xmm8 , 0x20(%0)\n\t" \
+ "movaps %%xmm9 , 0x30(%0)\n\t" \
+ "movaps %%xmm10, 0x40(%0)\n\t" \
+ "movaps %%xmm11, 0x50(%0)\n\t" \
+ "movaps %%xmm12, 0x60(%0)\n\t" \
+ "movaps %%xmm13, 0x70(%0)\n\t" \
+ "movaps %%xmm14, 0x80(%0)\n\t" \
+ "movaps %%xmm15, 0x90(%0)\n\t" \
+ ::"r"(xmm[1]): "memory"); \
+ if (memcmp(xmm[0], xmm[1], 10*16)) { \
+ int i; \
+ av_log(avctx, AV_LOG_ERROR, "XMM REGS CLOBBERED IN %s!\n", __func__); \
+ for (i = 0; i < 10*16; i++) \
+ if (xmm[0][i] != xmm[1][i]) \
+ av_log(avctx, AV_LOG_ERROR, "xmm[0][%x] = %02x, xmm[1][%x] =
%02x\n", i, xmm[0][i], i, xmm[1][i]); \
+ exit(-1); \
+ } \
+ ret; \
+})
+#else
+#define testxmmclobbers(func, ...) \
+ func(__VA_ARGS__)
+#endif
+
void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
{
if(min_size < *size)
@@ -793,7 +839,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext
*avctx, AVCodec *codec, AVD
}
if(avctx->codec->init && !(avctx->active_thread_type&FF_THREAD_FRAME)){
- ret = avctx->codec->init(avctx);
+ ret = testxmmclobbers(avctx->codec->init, avctx);
if (ret < 0) {
goto free_and_end;
}
@@ -873,7 +919,7 @@ int attribute_align_arg
avcodec_encode_audio2(AVCodecContext *avctx,
if (avctx->codec->encode2) {
*got_packet_ptr = 0;
- ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+ ret = testxmmclobbers(avctx->codec->encode2, avctx, avpkt, frame,
got_packet_ptr);
if (!ret && *got_packet_ptr &&
!(avctx->codec->capabilities & CODEC_CAP_DELAY)) {
avpkt->pts = frame->pts;
@@ -916,7 +962,7 @@ int attribute_align_arg
avcodec_encode_audio2(AVCodecContext *avctx,
}
/* encode the frame */
- ret = avctx->codec->encode(avctx, avpkt->data, avpkt->size,
+ ret = testxmmclobbers(avctx->codec->encode, avctx, avpkt->data,
avpkt->size,
frame ? frame->data[0] : NULL);
if (ret >= 0) {
if (!ret) {
@@ -1045,7 +1091,7 @@ int attribute_align_arg
avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf
if(av_image_check_size(avctx->width, avctx->height, 0, avctx))
return -1;
if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){
- int ret = avctx->codec->encode(avctx, buf, buf_size, pict);
+ int ret = testxmmclobbers(avctx->codec->encode, avctx, buf, buf_size,
pict);
avctx->frame_number++;
emms_c(); //needed to avoid an emms_c() call before every return;
@@ -1064,7 +1110,7 @@ int avcodec_encode_subtitle(AVCodecContext *avctx,
uint8_t *buf, int buf_size,
}
if(sub->num_rects == 0 || !sub->rects)
return -1;
- ret = avctx->codec->encode(avctx, buf, buf_size, sub);
+ ret = testxmmclobbers(avctx->codec->encode, avctx, buf, buf_size, sub);
avctx->frame_number++;
return ret;
}
@@ -1129,7 +1175,7 @@ int attribute_align_arg
avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
avpkt);
else {
- ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
+ ret = testxmmclobbers(avctx->codec->decode, avctx, picture,
got_picture_ptr,
avpkt);
picture->pkt_dts= avpkt->dts;
picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
@@ -1214,7 +1260,7 @@ int attribute_align_arg
avcodec_decode_audio4(AVCodecContext *avctx,
apply_param_change(avctx, avpkt);
if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
- ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt);
+ ret = testxmmclobbers(avctx->codec->decode, avctx, frame,
got_frame_ptr, avpkt);
if (ret >= 0 && *got_frame_ptr) {
avctx->frame_number++;
frame->pkt_dts = avpkt->dts;
@@ -1233,7 +1279,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx,
AVSubtitle *sub,
avctx->pkt = avpkt;
*got_sub_ptr = 0;
- ret = avctx->codec->decode(avctx, sub, got_sub_ptr, avpkt);
+ ret = testxmmclobbers(avctx->codec->decode, avctx, sub, got_sub_ptr,
avpkt);
if (*got_sub_ptr)
avctx->frame_number++;
return ret;
--
1.7.7.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel