From: Ramiro Polla <[email protected]>

This will be useful to test more aggressively for failures to mark XMM
registers as clobbered in Win64 builds, and prevent regressions thereof.
---
 configure          |    4 +++
 libavcodec/utils.c |   62 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/configure b/configure
index 49f9af2..d75f664 100755
--- a/configure
+++ b/configure
@@ -116,6 +116,9 @@ Configuration options:
                            disable buffer boundary checking in bitreaders
                            (faster, but may crash)
   --enable-memalign-hack   emulate memalign, interferes with memory debuggers
+  --enable-win64-test-xmm-clobber
+                           check XMM registers for clobbering (Win64-only;
+                           should be used only for debugging purposes)
   --disable-everything     disable all components listed below
   --disable-encoder=NAME   disable encoder NAME
   --enable-encoder=NAME    enable encoder NAME
@@ -991,6 +994,7 @@ CONFIG_LIST="
     vda
     vdpau
     version3
+    win64_test_xmm_clobber
     x11grab
     zlib
 "
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index fa60953..68d664c 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -52,6 +52,52 @@ static int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
 static void *codec_mutex;
 static void *avformat_mutex;
 
+#if defined(_WIN64) && CONFIG_WIN64_TEST_XMM_CLOBBER
+#undef exit
+#define testxmmclobbers(func, ...)        \
+({                                        \
+    char xmm[2][10*16];                   \
+    int ret;                              \
+    __asm volatile(                       \
+        "movaps %%xmm6 , 0x00(%0)\n\t"    \
+        "movaps %%xmm7 , 0x10(%0)\n\t"    \
+        "movaps %%xmm8 , 0x20(%0)\n\t"    \
+        "movaps %%xmm9 , 0x30(%0)\n\t"    \
+        "movaps %%xmm10, 0x40(%0)\n\t"    \
+        "movaps %%xmm11, 0x50(%0)\n\t"    \
+        "movaps %%xmm12, 0x60(%0)\n\t"    \
+        "movaps %%xmm13, 0x70(%0)\n\t"    \
+        "movaps %%xmm14, 0x80(%0)\n\t"    \
+        "movaps %%xmm15, 0x90(%0)\n\t"    \
+        ::"r"(xmm[0]): "memory");         \
+    ret = func(__VA_ARGS__);              \
+    __asm volatile(                       \
+        "movaps %%xmm6 , 0x00(%0)\n\t"    \
+        "movaps %%xmm7 , 0x10(%0)\n\t"    \
+        "movaps %%xmm8 , 0x20(%0)\n\t"    \
+        "movaps %%xmm9 , 0x30(%0)\n\t"    \
+        "movaps %%xmm10, 0x40(%0)\n\t"    \
+        "movaps %%xmm11, 0x50(%0)\n\t"    \
+        "movaps %%xmm12, 0x60(%0)\n\t"    \
+        "movaps %%xmm13, 0x70(%0)\n\t"    \
+        "movaps %%xmm14, 0x80(%0)\n\t"    \
+        "movaps %%xmm15, 0x90(%0)\n\t"    \
+        ::"r"(xmm[1]): "memory");         \
+    if (memcmp(xmm[0], xmm[1], 10*16)) {  \
+        int i;                            \
+        av_log(avctx, AV_LOG_ERROR, "XMM REGS CLOBBERED IN %s!\n", __func__); \
+        for (i = 0; i < 10*16; i++)       \
+            if (xmm[0][i] != xmm[1][i])   \
+                av_log(avctx, AV_LOG_ERROR, "xmm[0][%x] = %02x, xmm[1][%x] = 
%02x\n", i, xmm[0][i], i, xmm[1][i]); \
+        exit(-1);                         \
+    }                                     \
+    ret;                                  \
+})
+#else
+#define testxmmclobbers(func, ...)        \
+    func(__VA_ARGS__)
+#endif
+
 void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
 {
     if(min_size < *size)
@@ -793,7 +839,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext 
*avctx, AVCodec *codec, AVD
     }
 
     if(avctx->codec->init && !(avctx->active_thread_type&FF_THREAD_FRAME)){
-        ret = avctx->codec->init(avctx);
+        ret = testxmmclobbers(avctx->codec->init, avctx);
         if (ret < 0) {
             goto free_and_end;
         }
@@ -873,7 +919,7 @@ int attribute_align_arg 
avcodec_encode_audio2(AVCodecContext *avctx,
 
     if (avctx->codec->encode2) {
         *got_packet_ptr = 0;
-        ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+        ret = testxmmclobbers(avctx->codec->encode2, avctx, avpkt, frame, 
got_packet_ptr);
         if (!ret && *got_packet_ptr &&
             !(avctx->codec->capabilities & CODEC_CAP_DELAY)) {
             avpkt->pts = frame->pts;
@@ -916,7 +962,7 @@ int attribute_align_arg 
avcodec_encode_audio2(AVCodecContext *avctx,
         }
 
         /* encode the frame */
-        ret = avctx->codec->encode(avctx, avpkt->data, avpkt->size,
+        ret = testxmmclobbers(avctx->codec->encode, avctx, avpkt->data, 
avpkt->size,
                                    frame ? frame->data[0] : NULL);
         if (ret >= 0) {
             if (!ret) {
@@ -1045,7 +1091,7 @@ int attribute_align_arg 
avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf
     if(av_image_check_size(avctx->width, avctx->height, 0, avctx))
         return -1;
     if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){
-        int ret = avctx->codec->encode(avctx, buf, buf_size, pict);
+        int ret = testxmmclobbers(avctx->codec->encode, avctx, buf, buf_size, 
pict);
         avctx->frame_number++;
         emms_c(); //needed to avoid an emms_c() call before every return;
 
@@ -1064,7 +1110,7 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, 
uint8_t *buf, int buf_size,
     }
     if(sub->num_rects == 0 || !sub->rects)
         return -1;
-    ret = avctx->codec->encode(avctx, buf, buf_size, sub);
+    ret = testxmmclobbers(avctx->codec->encode, avctx, buf, buf_size, sub);
     avctx->frame_number++;
     return ret;
 }
@@ -1129,7 +1175,7 @@ int attribute_align_arg 
avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
              ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
                                           avpkt);
         else {
-            ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
+            ret = testxmmclobbers(avctx->codec->decode, avctx, picture, 
got_picture_ptr,
                               avpkt);
             picture->pkt_dts= avpkt->dts;
             picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
@@ -1214,7 +1260,7 @@ int attribute_align_arg 
avcodec_decode_audio4(AVCodecContext *avctx,
     apply_param_change(avctx, avpkt);
 
     if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
-        ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt);
+        ret = testxmmclobbers(avctx->codec->decode, avctx, frame, 
got_frame_ptr, avpkt);
         if (ret >= 0 && *got_frame_ptr) {
             avctx->frame_number++;
             frame->pkt_dts = avpkt->dts;
@@ -1233,7 +1279,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, 
AVSubtitle *sub,
 
     avctx->pkt = avpkt;
     *got_sub_ptr = 0;
-    ret = avctx->codec->decode(avctx, sub, got_sub_ptr, avpkt);
+    ret = testxmmclobbers(avctx->codec->decode, avctx, sub, got_sub_ptr, 
avpkt);
     if (*got_sub_ptr)
         avctx->frame_number++;
     return ret;
-- 
1.7.7.4

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to