Hi,

2015-02-06 11:07 GMT+01:00 Hendrik Leppkes <h.lepp...@gmail.com>:
> I looked into the MSVC 64bit failure from this patch, and from what I
> can tell doing this doesn't work:
> movsx      a_strideq, byte [pb_eo+eoq*4+1]
>
> I'm not entirely sure on the specifics why it breaks however..
> But all I could find suggests that you should load the table address
> into a reg first, and then use that reg for the address computation.
>
> For some reason, the non-WIN64 version does just this, but the WIN64
> version does not.
> Any particular reason for this difference?

So I infer from those last 2 sentences it is a link failure. Indeed, I
did observe that issue of mingw64 (or whatever you call that) being
fine with this RIP addressing not being respected, but MSVC not.

So yes, an intermediate reg is needed, and we can't save that indirection.

Could you test the attached patch? Beware, it's on a tree having the
patch, but I'm not completely sure it will apply fine.

-- 
Christophe
From f0997ceac461add7dddbb1c0a75797bf462bf16e Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisq...@gmail.com>
Date: Fri, 6 Feb 2015 13:43:45 +0100
Subject: [PATCH] x86: hevc_sao: fix loading of RIP address

pb_eo must be handled as a rip relative address for MSVC64, so an
intermediate register is needed. Should fix link failures.
---
 libavcodec/x86/hevc_sao.asm | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 5136121..8619716 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -296,14 +296,16 @@ HEVC_SAO_BAND_FILTER_16 12, 64, 2
 %if WIN64
 cglobal hevc_sao_edge_filter_%1_8, 4, 8, 8, dst, src, dststride, offset, a_stride, b_stride, height, tmp
 %define  eoq heightq
-    movsxd           eoq, dword r4m
-    movsx      a_strideq, byte [pb_eo+eoq*4+1]
-    movsx      b_strideq, byte [pb_eo+eoq*4+3]
+    movsxd     b_strideq, dword r4m
+    lea             tmpq, [pb_eo]
+    lea              eoq, [tmpq+4*b_strideq]
+    movsx      a_strideq, byte [eoq+1]
+    movsx      b_strideq, byte [eoq+3]
     imul       a_strideq, EDGE_SRCSTRIDE
     imul       b_strideq, EDGE_SRCSTRIDE
-    movsx           tmpq, byte [pb_eo+eoq*4]
+    movsx           tmpq, byte [eoq]
     add        a_strideq, tmpq
-    movsx           tmpq, byte [pb_eo+eoq*4+2]
+    movsx           tmpq, byte [eoq+2]
     add        b_strideq, tmpq
     mov          heightd, r6m
 
@@ -442,14 +444,16 @@ INIT_YMM cpuname
 %if WIN64
 cglobal hevc_sao_edge_filter_%2_%1, 4, 8, 16, dst, src, dststride, offset, a_stride, b_stride, height, tmp
 %define  eoq heightq
-    movsxd           eoq, dword r4m
-    movsx      a_strideq, byte [pb_eo+eoq*4+1]
-    movsx      b_strideq, byte [pb_eo+eoq*4+3]
+    movsxd     b_strideq, dword r4m
+    lea             tmpq, [pb_eo]
+    lea              eoq, [tmpq+4*b_strideq]
+    movsx      a_strideq, byte [eoq+1]
+    movsx      b_strideq, byte [eoq+3]
     imul       a_strideq, EDGE_SRCSTRIDE>>1
     imul       b_strideq, EDGE_SRCSTRIDE>>1
-    movsx           tmpq, byte [pb_eo+eoq*4]
+    movsx           tmpq, byte [eoq]
     add        a_strideq, tmpq
-    movsx           tmpq, byte [pb_eo+eoq*4+2]
+    movsx           tmpq, byte [eoq+2]
     add        b_strideq, tmpq
     mov          heightd, r6m
     add        a_strideq, a_strideq
-- 
1.9.5.msysgit.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to