This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic. --- libavcodec/arm/rv34dsp_neon.S | 4 +-- libavcodec/arm/rv40dsp_neon.S | 4 +-- libavcodec/rv30.c | 4 +-- libavcodec/rv30dsp.c | 64 +++++++++++++++++++++++++++++++++++-------- libavcodec/rv34.c | 10 ++++--- libavcodec/rv34.h | 2 +- libavcodec/rv40.c | 2 +- libavcodec/rv40dsp.c | 18 +++++++----- libavcodec/x86/rv34dsp.asm | 4 +-- libavcodec/x86/rv40dsp.asm | 11 +++----- libavcodec/x86/rv40dsp_init.c | 4 +-- 11 files changed, 85 insertions(+), 42 deletions(-)
diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S index a29123f..06747f4 100644 --- a/libavcodec/arm/rv34dsp_neon.S +++ b/libavcodec/arm/rv34dsp_neon.S @@ -67,7 +67,7 @@ vsub.s32 q15, q14, q9 @ z0 - z3 .endm -/* void rv34_idct_add_c(uint8_t *dst, int stride, int16_t *block) */ +/* void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, int16_t *block) */ function ff_rv34_idct_add_neon, export=1 mov r3, r0 rv34_inv_transform r2 @@ -119,7 +119,7 @@ function ff_rv34_inv_transform_noround_neon, export=1 bx lr endfunc -/* void ff_rv34_idct_dc_add_neon(uint8_t *dst, int stride, int dc) */ +/* void ff_rv34_idct_dc_add_neon(uint8_t *dst, ptrdiff_t stride, int dc) */ function ff_rv34_idct_dc_add_neon, export=1 mov r3, r0 vld1.32 {d28[]}, [r0,:32], r1 diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S index 6bd45eb..1facfb8 100644 --- a/libavcodec/arm/rv40dsp_neon.S +++ b/libavcodec/arm/rv40dsp_neon.S @@ -687,7 +687,7 @@ endfunc .endm /* void ff_rv40_weight_func_16_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2, - int w1, int w2, int stride) */ + int w1, int w2, ptrdiff_t stride) */ function ff_rv40_weight_func_16_neon, export=1 ldr r12, [sp] vmov d0, r3, r12 @@ -704,7 +704,7 @@ function ff_rv40_weight_func_16_neon, export=1 endfunc /* void ff_rv40_weight_func_8_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2, - int w1, int w2, int stride) */ + int w1, int w2, ptrdiff_t stride) */ function ff_rv40_weight_func_8_neon, export=1 ldr r12, [sp] vmov d0, r3, r12 diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c index 7218fa3..d549eb7 100644 --- a/libavcodec/rv30.c +++ b/libavcodec/rv30.c @@ -125,8 +125,8 @@ static int rv30_decode_mb_info(RV34DecContext *r) return rv30_b_types[code]; } -static inline void rv30_weak_loop_filter(uint8_t *src, const int step, - const int stride, const int lim) +static inline void rv30_weak_loop_filter(uint8_t *src, const ptrdiff_t step, + const ptrdiff_t stride, const int lim) { const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; int i, diff; diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c index 50f4186..d76b374 100644 --- a/libavcodec/rv30dsp.c +++ b/libavcodec/rv30dsp.c @@ -31,7 +31,11 @@ #include "rv34dsp.h" #define RV30_LOWPASS(OPNAME, OP) \ -static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\ +static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride,\ + const int C1, const int C2)\ +{\ const int h = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ int i;\ @@ -50,7 +54,11 @@ static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int }\ }\ \ -static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\ +static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride,\ + const int C1, const int C2)\ +{\ const int w = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ int i;\ @@ -80,7 +88,10 @@ static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int }\ }\ \ -static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ const int w = 8;\ const int h = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ @@ -99,7 +110,10 @@ static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src, in }\ }\ \ -static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ const int w = 8;\ const int h = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ @@ -118,7 +132,10 @@ static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src, i }\ }\ \ -static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ const int w = 8;\ const int h = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ @@ -137,7 +154,10 @@ static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src, i }\ }\ \ -static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ const int w = 8;\ const int h = 8;\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ @@ -155,7 +175,11 @@ static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src, }\ }\ \ -static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\ +static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride,\ + const int C1, const int C2)\ +{\ OPNAME ## rv30_tpel8_v_lowpass(dst , src , dstStride, srcStride, C1, C2);\ OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\ src += 8*srcStride;\ @@ -164,7 +188,11 @@ static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src, in OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\ }\ \ -static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\ +static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride,\ + const int C1, const int C2)\ +{\ OPNAME ## rv30_tpel8_h_lowpass(dst , src , dstStride, srcStride, C1, C2);\ OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\ src += 8*srcStride;\ @@ -173,7 +201,10 @@ static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src, in OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\ }\ \ -static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ OPNAME ## rv30_tpel8_hv_lowpass(dst , src , dstStride, srcStride);\ OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ @@ -182,7 +213,10 @@ static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src, i OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\ }\ \ -static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ OPNAME ## rv30_tpel8_hhv_lowpass(dst , src , dstStride, srcStride);\ OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ @@ -191,7 +225,10 @@ static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src, OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\ }\ \ -static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ OPNAME ## rv30_tpel8_hvv_lowpass(dst , src , dstStride, srcStride);\ OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ @@ -200,7 +237,10 @@ static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src, OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\ }\ \ -static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, const uint8_t *src,\ + ptrdiff_t dstStride,\ + ptrdiff_t srcStride)\ +{\ OPNAME ## rv30_tpel8_hhvv_lowpass(dst , src , dstStride, srcStride);\ OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 4220195..8a7bdee 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -42,7 +42,7 @@ #include "rv34data.h" #include "rv34.h" -static inline void ZERO8x2(void* dst, int stride) +static inline void ZERO8x2(void* dst, ptrdiff_t stride) { fill_rectangle(dst, 1, 2, stride, 0, 4); fill_rectangle(((uint8_t*)(dst))+4, 1, 2, stride, 0, 4); @@ -953,7 +953,9 @@ static const int ittrans16[4] = { /** * Perform 4x4 intra prediction. */ -static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst, int stride, int itype, int up, int left, int down, int right) +static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst, + ptrdiff_t stride, int itype, + int up, int left, int down, int right) { uint8_t *prev = dst - stride + 4; uint32_t topleft; @@ -997,7 +999,7 @@ static inline int adjust_pred16(int itype, int up, int left) } static inline void rv34_process_block(RV34DecContext *r, - uint8_t *pdst, int stride, + uint8_t *pdst, ptrdiff_t stride, int fc, int sc, int q_dc, int q_ac) { MpegEncContext *s = &r->s; @@ -1139,7 +1141,7 @@ static void rv34_output_intra(RV34DecContext *r, int8_t *intra_types, int cbp) } } -static int is_mv_diff_gt_3(int16_t (*motion_val)[2], int step) +static int is_mv_diff_gt_3(int16_t (*motion_val)[2], ptrdiff_t step) { int d; d = motion_val[0][0] - motion_val[-step][0]; diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h index 0ac24bf..b331722 100644 --- a/libavcodec/rv34.h +++ b/libavcodec/rv34.h @@ -86,7 +86,7 @@ typedef struct RV34DecContext{ RV34DSPContext rdsp; int8_t *intra_types_hist;///< old block types, used for prediction int8_t *intra_types; ///< block types - int intra_types_stride;///< block types array stride + ptrdiff_t intra_types_stride; ///< block types array stride const uint8_t *luma_dc_quant_i;///< luma subblock DC quantizer for intraframes const uint8_t *luma_dc_quant_p;///< luma subblock DC quantizer for interframes diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c index 0da1312..c55199a 100644 --- a/libavcodec/rv40.c +++ b/libavcodec/rv40.c @@ -299,7 +299,7 @@ static const int neighbour_offs_x[4] = { 0, 0, -1, 0 }; static const int neighbour_offs_y[4] = { 0, -1, 0, 1 }; static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp, - uint8_t *src, int stride, int dmode, + uint8_t *src, ptrdiff_t stride, int dmode, int lim_q1, int lim_p1, int alpha, int beta, int beta2, int chroma, int edge, int dir) diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index 4ca5cc7..64d9f2e 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -34,7 +34,8 @@ #include "rv34dsp.h" #define RV40_LOWPASS(OPNAME, OP) \ -static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\ +static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t dstStride, ptrdiff_t srcStride, \ const int h, const int C1, const int C2, const int SHIFT){\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ int i;\ @@ -53,7 +54,8 @@ static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int }\ }\ \ -static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\ +static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t dstStride, ptrdiff_t srcStride, \ const int w, const int C1, const int C2, const int SHIFT){\ const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\ int i;\ @@ -85,7 +87,8 @@ static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int }\ }\ \ -static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\ +static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t dstStride, ptrdiff_t srcStride, \ const int w, const int C1, const int C2, const int SHIFT){\ OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\ OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\ @@ -95,7 +98,8 @@ static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, in OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\ }\ \ -static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\ +static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t dstStride, ptrdiff_t srcStride, \ const int h, const int C1, const int C2, const int SHIFT){\ OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\ OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\ @@ -427,7 +431,7 @@ static const uint8_t rv40_dither_r[16] = { * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1 */ static av_always_inline void rv40_weak_loop_filter(uint8_t *src, - const int step, + const ptrdiff_t step, const ptrdiff_t stride, const int filter_p1, const int filter_q1, @@ -495,7 +499,7 @@ static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, } static av_always_inline void rv40_strong_loop_filter(uint8_t *src, - const int step, + const ptrdiff_t step, const ptrdiff_t stride, const int alpha, const int lims, @@ -567,7 +571,7 @@ static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, } static av_always_inline int rv40_loop_filter_strength(uint8_t *src, - int step, ptrdiff_t stride, + ptrdiff_t step, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1) diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index 4d9c35b..8d21c78 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -63,7 +63,7 @@ rv34_idct dc %define IDCT_DC IDCT_DC_NOROUND rv34_idct dc_noround -; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc); +; ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc); INIT_MMX mmx cglobal rv34_idct_dc_add, 3, 3 ; calculate DC @@ -166,7 +166,7 @@ cglobal rv34_idct_add, 3,3,0, d, s, b COL_TRANSFORM [dq+sq], mm7, mm0, mm4 ret -; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc); +; ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc); INIT_XMM sse4 cglobal rv34_idct_dc_add, 3, 3, 6 ; load data diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index 77f6ddb..3a1f2b5 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -77,14 +77,11 @@ SECTION .text ;----------------------------------------------------------------------------- ; subpel MC functions: ; -; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride, -; uint8_t *src, int srcstride, -; int len, int m); +; void ff_[put|avg]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, ptrdiff_t deststride, +; uint8_t *src, ptrdiff_t srcstride, +; int len, ptrdiff_t m); ;---------------------------------------------------------------------- %macro LOAD 2 -%if WIN64 - movsxd %1q, %1d -%endif %ifdef PIC add %1q, picregq %else @@ -438,7 +435,7 @@ FILTER_SSSE3 avg %endmacro -; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride) +; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride) ; %1=size %2=num of xmm regs ; The weights are FP0.14 notation of fractions depending on pts. ; For timebases without rounding error (i.e. PAL), the fractions diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index 7bf3ecd..3384585 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -107,11 +107,11 @@ static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \ void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ const uint8_t *src, \ ptrdiff_t srcStride, \ - int len, int m); \ + int len, ptrdiff_t m); \ void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ const uint8_t *src, \ ptrdiff_t srcStride, \ - int len, int m); \ + int len, ptrdiff_t m); \ QPEL_FUNCS_DECL(OP, 0, 1, OPT) \ QPEL_FUNCS_DECL(OP, 0, 3, OPT) \ QPEL_FUNCS_DECL(OP, 1, 0, OPT) \ -- 2.7.3 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel