On 2011-12-10 15:43:54 +0000, Mans Rullgard wrote:
> This splits the loop filter functions into smaller, more SIMD-friendly
> functions.
> 
> Signed-off-by: Mans Rullgard <[email protected]>
> ---
>  libavcodec/rv34dsp.h |   21 +++-
>  libavcodec/rv40.c    |   69 ++++++++----
>  libavcodec/rv40dsp.c |  296 
> ++++++++++++++++++++++++++++++--------------------
>  3 files changed, 240 insertions(+), 146 deletions(-)
> 
> diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
> index 695af06..f2bc20e 100644
> --- a/libavcodec/rv34dsp.h
> +++ b/libavcodec/rv34dsp.h
> @@ -36,10 +36,18 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align 
> width (8 or 16)*/,
>  
>  typedef void (*rv34_inv_transform_func)(DCTELEM *block);
>  
> -typedef void (*rv40_loop_filter_func)(uint8_t *src, int stride, int dmode,
> -                                      int lim_q1, int lim_p1, int alpha,
> -                                      int beta, int beta2, int chroma,
> -                                      int edge);
> +typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride,
> +                                           int filter_p1, int filter_q1,
> +                                           int alpha, int beta,
> +                                           int lims, int lim_q1, int lim_p1);
> +
> +typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, int stride,
> +                                             int alpha, int lims,
> +                                             int dmode, int chroma);
> +
> +typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, int stride,
> +                                              int beta, int beta2, int edge,
> +                                              int *p1, int *q1);
>  
>  typedef struct RV34DSPContext {
>      qpel_mc_func put_pixels_tab[4][16];
> @@ -48,8 +56,9 @@ typedef struct RV34DSPContext {
>      h264_chroma_mc_func avg_chroma_pixels_tab[3];
>      rv40_weight_func rv40_weight_pixels_tab[2];
>      rv34_inv_transform_func rv34_inv_transform_tab[2];
> -    rv40_loop_filter_func rv40_h_loop_filter;
> -    rv40_loop_filter_func rv40_v_loop_filter;
> +    rv40_weak_loop_filter_func rv40_weak_loop_filter[2];
> +    rv40_strong_loop_filter_func rv40_strong_loop_filter[2];
> +    rv40_loop_filter_strength_func rv40_loop_filter_strength[2];
>  } RV34DSPContext;
>  
>  void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
> diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
> index 3216e83..bde63e1 100644
> --- a/libavcodec/rv40.c
> +++ b/libavcodec/rv40.c
> @@ -294,6 +294,34 @@ enum RV40BlockPos{
>  static const int neighbour_offs_x[4] = { 0,  0, -1, 0 };
>  static const int neighbour_offs_y[4] = { 0, -1,  0, 1 };
>  
> +static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp,
> +                                      uint8_t *src, int stride, int dmode,
> +                                      int lim_q1, int lim_p1,
> +                                      int alpha, int beta, int beta2,
> +                                      int chroma, int edge, int dir)
> +{
> +    int filter_p1, filter_q1;
> +    int strong;
> +    int lims;
> +
> +    strong = rdsp->rv40_loop_filter_strength[dir](src, stride, beta, beta2,
> +                                                  edge, &filter_p1, 
> &filter_q1);
> +
> +    lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
> +
> +    if (strong) {
> +        rdsp->rv40_strong_loop_filter[dir](src, stride, alpha,
> +                                           lims, dmode, chroma);
> +    } else if (filter_p1 & filter_q1) {
> +        rdsp->rv40_weak_loop_filter[dir](src, stride, 1, 1, alpha, beta,
> +                                         lims, lim_q1, lim_p1);
> +    } else if (filter_p1 | filter_q1) {
> +        rdsp->rv40_weak_loop_filter[dir](src, stride, filter_p1, filter_q1,
> +                                         alpha, beta, lims >> 1, lim_q1 >> 1,
> +                                         lim_p1 >> 1);
> +    }
> +}
> +
>  /**
>   * RV40 loop filtering function
>   */
> @@ -430,10 +458,11 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
>                  // if bottom block is coded then we can filter its top edge
>                  // (or bottom edge of this block, which is the same)
>                  if(y_h_deblock & (MASK_BOTTOM << ij)){
> -                    r->rdsp.rv40_h_loop_filter(Y+4*s->linesize, s->linesize, 
> dither,
> -                                       y_to_deblock & (MASK_BOTTOM << ij) ? 
> clip[POS_CUR] : 0,
> -                                       clip_cur,
> -                                       alpha, beta, betaY, 0, 0);
> +                    rv40_adaptive_loop_filter(&r->rdsp, Y+4*s->linesize,
> +                                              s->linesize, dither,
> +                                              y_to_deblock & (MASK_BOTTOM << 
> ij) ? clip[POS_CUR] : 0,
> +                                              clip_cur, alpha, beta, betaY,
> +                                              0, 0, 0);
>                  }
>                  // filter left block edge in ordinary mode (with low 
> filtering strength)
>                  if(y_v_deblock & (MASK_CUR << ij) && (i || 
> !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
> @@ -441,25 +470,25 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
>                          clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? 
> clip[POS_LEFT] : 0;
>                      else
>                          clip_left = y_to_deblock & (MASK_CUR << (ij-1)) ? 
> clip[POS_CUR] : 0;
> -                    r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
> -                                       clip_cur,
> -                                       clip_left,
> -                                       alpha, beta, betaY, 0, 0);
> +                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, 
> dither,
> +                                              clip_cur,
> +                                              clip_left,
> +                                              alpha, beta, betaY, 0, 0, 1);
>                  }
>                  // filter top edge of the current macroblock when filtering 
> strength is high
>                  if(!j && y_h_deblock & (MASK_CUR << i) && 
> (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
> -                    r->rdsp.rv40_h_loop_filter(Y, s->linesize, dither,
> +                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, 
> dither,
>                                         clip_cur,
>                                         mvmasks[POS_TOP] & (MASK_TOP << i) ? 
> clip[POS_TOP] : 0,
> -                                       alpha, beta, betaY, 0, 1);
> +                                       alpha, beta, betaY, 0, 1, 0);
>                  }
>                  // filter left block edge in edge mode (with high filtering 
> strength)
>                  if(y_v_deblock & (MASK_CUR << ij) && !i && 
> (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
>                      clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? 
> clip[POS_LEFT] : 0;
> -                    r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
> +                    rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, 
> dither,
>                                         clip_cur,
>                                         clip_left,
> -                                       alpha, beta, betaY, 0, 1);
> +                                       alpha, beta, betaY, 0, 1, 1);
>                  }
>              }
>          }
> @@ -471,34 +500,34 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
>                      int clip_cur = c_to_deblock[k] & (MASK_CUR << ij) ? 
> clip[POS_CUR] : 0;
>                      if(c_h_deblock[k] & (MASK_CUR << (ij+2))){
>                          int clip_bot = c_to_deblock[k] & (MASK_CUR << 
> (ij+2)) ? clip[POS_CUR] : 0;
> -                        r->rdsp.rv40_h_loop_filter(C+4*s->uvlinesize, 
> s->uvlinesize, i*8,
> +                        rv40_adaptive_loop_filter(&r->rdsp, 
> C+4*s->uvlinesize, s->uvlinesize, i*8,
>                                             clip_bot,
>                                             clip_cur,
> -                                           alpha, beta, betaC, 1, 0);
> +                                           alpha, beta, betaC, 1, 0, 0);
>                      }
>                      if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || 
> !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
>                          if(!i)
>                              clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << 
> (2*j+1)) ? clip[POS_LEFT] : 0;
>                          else
>                              clip_left = c_to_deblock[k]    & (MASK_CUR << 
> (ij-1))  ? clip[POS_CUR]  : 0;
> -                        r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
> +                        rv40_adaptive_loop_filter(&r->rdsp, C, 
> s->uvlinesize, j*8,
>                                             clip_cur,
>                                             clip_left,
> -                                           alpha, beta, betaC, 1, 0);
> +                                           alpha, beta, betaC, 1, 0, 1);
>                      }
>                      if(!j && c_h_deblock[k] & (MASK_CUR << ij) && 
> (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
>                          int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << 
> (ij+2)) ? clip[POS_TOP] : 0;
> -                        r->rdsp.rv40_h_loop_filter(C, s->uvlinesize, i*8,
> +                        rv40_adaptive_loop_filter(&r->rdsp, C, 
> s->uvlinesize, i*8,
>                                             clip_cur,
>                                             clip_top,
> -                                           alpha, beta, betaC, 1, 1);
> +                                           alpha, beta, betaC, 1, 1, 0);
>                      }
>                      if(c_v_deblock[k] & (MASK_CUR << ij) && !i && 
> (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
>                          clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << 
> (2*j+1)) ? clip[POS_LEFT] : 0;
> -                        r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
> +                        rv40_adaptive_loop_filter(&r->rdsp, C, 
> s->uvlinesize, j*8,
>                                             clip_cur,
>                                             clip_left,
> -                                           alpha, beta, betaC, 1, 1);
> +                                           alpha, beta, betaC, 1, 1, 1);

all the rv40_adaptive_loop_filter calls could nicer and shorter
indented. Just touching modified lines would be ok too but please
be consistent.

>                      }
>                  }
>              }
> diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
> index 06bdf18..909ccd1 100644
> --- a/libavcodec/rv40dsp.c
> +++ b/libavcodec/rv40dsp.c
> @@ -314,142 +314,194 @@ static const uint8_t rv40_dither_r[16] = {
>  /**
>   * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
>   */
> -static inline void rv40_weak_loop_filter(uint8_t *src, const int step,
> -                                         const int filter_p1, const int 
> filter_q1,
> -                                         const int alpha, const int beta,
> -                                         const int lim_p0q0,
> -                                         const int lim_q1, const int lim_p1,
> -                                         const int diff_p1p0, const int 
> diff_q1q0,
> -                                         const int diff_p1p2, const int 
> diff_q1q2)
> +static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
> +                                                   const int step,
> +                                                   const int stride,
> +                                                   const int filter_p1,
> +                                                   const int filter_q1,
> +                                                   const int alpha,
> +                                                   const int beta,
> +                                                   const int lim_p0q0,
> +                                                   const int lim_q1,
> +                                                   const int lim_p1)
>  {
>      uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
> -    int t, u, diff;
> -
> -    t = src[0*step] - src[-1*step];
> -    if(!t)
> -        return;
> -    u = (alpha * FFABS(t)) >> 7;
> -    if(u > 3 - (filter_p1 && filter_q1))
> -        return;
> -
> -    t <<= 2;
> -    if(filter_p1 && filter_q1)
> -        t += src[-2*step] - src[1*step];
> -    diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
> -    src[-1*step] = cm[src[-1*step] + diff];
> -    src[ 0*step] = cm[src[ 0*step] - diff];
> -    if(FFABS(diff_p1p2) <= beta && filter_p1){
> -        t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
> -        src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
> +    int i, t, u, diff;
> +
> +    for (i = 0; i < 4; i++, src += stride) {
> +        int diff_p1p0 = src[-2*step] - src[-1*step];
> +        int diff_q1q0 = src[ 1*step] - src[ 0*step];
> +        int diff_p1p2 = src[-2*step] - src[-3*step];
> +        int diff_q1q2 = src[ 1*step] - src[ 2*step];
> +
> +        t = src[0*step] - src[-1*step];
> +        if (!t)
> +            continue;
> +
> +        u = (alpha * FFABS(t)) >> 7;
> +        if (u > 3 - (filter_p1 && filter_q1))
> +            continue;
> +
> +        t <<= 2;
> +        if (filter_p1 && filter_q1)
> +            t += src[-2*step] - src[1*step];
> +
> +        diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
> +        src[-1*step] = cm[src[-1*step] + diff];
> +        src[ 0*step] = cm[src[ 0*step] - diff];
> +
> +        if (filter_p1 && FFABS(diff_p1p2) <= beta) {
> +            t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
> +            src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
> +        }
> +
> +        if (filter_q1 && FFABS(diff_q1q2) <= beta) {
> +            t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
> +            src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
> +        }
>      }
> -    if(FFABS(diff_q1q2) <= beta && filter_q1){
> -        t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
> -        src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
> +}
> +
> +static void rv40_h_weak_loop_filter(uint8_t *src, const int stride,
> +                                    const int filter_p1, const int filter_q1,
> +                                    const int alpha, const int beta,
> +                                    const int lim_p0q0, const int lim_q1,
> +                                    const int lim_p1)
> +{
> +    rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
> +                          alpha, beta, lim_p0q0, lim_q1, lim_p1);
> +}
> +
> +static void rv40_v_weak_loop_filter(uint8_t *src, const int stride,
> +                                    const int filter_p1, const int filter_q1,
> +                                    const int alpha, const int beta,
> +                                    const int lim_p0q0, const int lim_q1,
> +                                    const int lim_p1)
> +{
> +    rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
> +                          alpha, beta, lim_p0q0, lim_q1, lim_p1);
> +}
> +
> +static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
> +                                                     const int step,
> +                                                     const int stride,
> +                                                     const int alpha,
> +                                                     const int lims,
> +                                                     const int dmode,
> +                                                     const int chroma)
> +{
> +    int i;
> +
> +    for(i = 0; i < 4; i++, src += stride){

missing spaces after for and )

> +        int sflag, p0, q0, p1, q1;
> +        int t = src[0*step] - src[-1*step];
> +
> +        if (!t)
> +            continue;
> +
> +        sflag = (alpha * FFABS(t)) >> 7;
> +        if (sflag > 1)
> +            continue;
> +
> +        p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
> +              26*src[ 0*step] + 25*src[ 1*step] +
> +              rv40_dither_l[dmode + i]) >> 7;
> +
> +        q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
> +              26*src[ 1*step] + 25*src[ 2*step] +
> +              rv40_dither_r[dmode + i]) >> 7;
> +
> +        if (sflag) {
> +            p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
> +            q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
> +        }
> +
> +        p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
> +              25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
> +        q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
> +              25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
> +
> +        if (sflag) {
> +            p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
> +            q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
> +        }
> +
> +        src[-2*step] = p1;
> +        src[-1*step] = p0;
> +        src[ 0*step] = q0;
> +        src[ 1*step] = q1;
> +
> +        if(!chroma){

missing spaces

> +            src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
> +                            51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
> +            src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
> +                            51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
> +        }
>      }
>  }
>  
> -static av_always_inline void rv40_adaptive_loop_filter(uint8_t *src, const 
> int step,
> -                                             const int stride, const int 
> dmode,
> -                                             const int lim_q1, const int 
> lim_p1,
> -                                             const int alpha,
> -                                             const int beta, const int beta2,
> -                                             const int chroma, const int 
> edge)
> +static void rv40_h_strong_loop_filter(uint8_t *src, const int stride,
> +                                      const int alpha, const int lims,
> +                                      const int dmode, const int chroma)
> +{
> +    rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
> +}
> +
> +static void rv40_v_strong_loop_filter(uint8_t *src, const int stride,
> +                                      const int alpha, const int lims,
> +                                      const int dmode, const int chroma)
> +{
> +    rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
> +}
> +
> +static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
> +                                                      int step, int stride,
> +                                                      int beta, int beta2,
> +                                                      int edge,
> +                                                      int *p1, int *q1)
>  {
> -    int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4];
>      int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
> +    int strong0 = 0, strong1 = 0;

Diego might want vertical alignment here

>      uint8_t *ptr;
> -    int flag_strong0 = 1, flag_strong1 = 1;
> -    int filter_p1, filter_q1;
>      int i;
> -    int lims;
>  
> -    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
> -        diff_p1p0[i] = ptr[-2*step] - ptr[-1*step];
> -        diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step];
> -        sum_p1p0 += diff_p1p0[i];
> -        sum_q1q0 += diff_q1q0[i];
> -    }
> -    filter_p1 = FFABS(sum_p1p0) < (beta<<2);
> -    filter_q1 = FFABS(sum_q1q0) < (beta<<2);
> -    if(!filter_p1 && !filter_q1)
> -        return;
> -
> -    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
> -        diff_p1p2[i] = ptr[-2*step] - ptr[-3*step];
> -        diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step];
> -        sum_p1p2 += diff_p1p2[i];
> -        sum_q1q2 += diff_q1q2[i];
> +    for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
> +        sum_p1p0 += ptr[-2*step] - ptr[-1*step];
> +        sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
>      }
>  
> -    if(edge){
> -        flag_strong0 = filter_p1 && (FFABS(sum_p1p2) < beta2);
> -        flag_strong1 = filter_q1 && (FFABS(sum_q1q2) < beta2);
> -    }else{
> -        flag_strong0 = flag_strong1 = 0;
> -    }
> +    *p1 = FFABS(sum_p1p0) < (beta << 2);
> +    *q1 = FFABS(sum_q1q0) < (beta << 2);
>  
> -    lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
> -    if(flag_strong0 && flag_strong1){ /* strong filtering */
> -        for(i = 0; i < 4; i++, src += stride){
> -            int sflag, p0, q0, p1, q1;
> -            int t = src[0*step] - src[-1*step];
> -
> -            if(!t) continue;
> -            sflag = (alpha * FFABS(t)) >> 7;
> -            if(sflag > 1) continue;
> -
> -            p0 = (25*src[-3*step] + 26*src[-2*step]
> -                + 26*src[-1*step]
> -                + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + 
> i]) >> 7;
> -            q0 = (25*src[-2*step] + 26*src[-1*step]
> -                + 26*src[ 0*step]
> -                + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + 
> i]) >> 7;
> -            if(sflag){
> -                p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
> -                q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
> -            }
> -            p1 = (25*src[-4*step] + 26*src[-3*step]
> -                + 26*src[-2*step]
> -                + 26*p0           + 25*src[ 0*step] + rv40_dither_l[dmode + 
> i]) >> 7;
> -            q1 = (25*src[-1*step] + 26*q0
> -                + 26*src[ 1*step]
> -                + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + 
> i]) >> 7;
> -            if(sflag){
> -                p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
> -                q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
> -            }
> -            src[-2*step] = p1;
> -            src[-1*step] = p0;
> -            src[ 0*step] = q0;
> -            src[ 1*step] = q1;
> -            if(!chroma){
> -                src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 
> 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
> -                src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 
> 2*step] + 26*src[ 3*step] + 64) >> 7;
> -            }
> -        }
> -    }else if(filter_p1 && filter_q1){
> -        for(i = 0; i < 4; i++, src += stride)
> -            rv40_weak_loop_filter(src, step, 1, 1, alpha, beta, lims, 
> lim_q1, lim_p1,
> -                                  diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], 
> diff_q1q2[i]);
> -    }else{
> -        for(i = 0; i < 4; i++, src += stride)
> -            rv40_weak_loop_filter(src, step, filter_p1, filter_q1,
> -                                  alpha, beta, lims>>1, lim_q1>>1, lim_p1>>1,
> -                                  diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], 
> diff_q1q2[i]);
> +    if(!*p1 && !*q1)
> +        return 0;
> +
> +    if (!edge)
> +        return 0;

maybe join with the previous if unless you think it's too cluttered

> +
> +    for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
> +        sum_p1p2 += ptr[-2*step] - ptr[-3*step];
> +        sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
>      }
> +
> +    strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
> +    strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
> +
> +    return strong0 && strong1;
>  }
>  
> -static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode,
> -                               int lim_q1, int lim_p1,
> -                               int alpha, int beta, int beta2, int chroma, 
> int edge){
> -    rv40_adaptive_loop_filter(src, 1, stride, dmode, lim_q1, lim_p1,
> -                              alpha, beta, beta2, chroma, edge);
> +static int rv40_h_loop_filter_strength(uint8_t *src, int stride,
> +                                       int beta, int beta2, int edge,
> +                                       int *p1, int *q1)
> +{
> +    return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, 
> q1);
>  }
> -static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode,
> -                               int lim_q1, int lim_p1,
> -                               int alpha, int beta, int beta2, int chroma, 
> int edge){
> -    rv40_adaptive_loop_filter(src, stride, 1, dmode, lim_q1, lim_p1,
> -                              alpha, beta, beta2, chroma, edge);
> +
> +static int rv40_v_loop_filter_strength(uint8_t *src, int stride,
> +                                       int beta, int beta2, int edge,
> +                                       int *p1, int *q1)
> +{
> +    return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, 
> q1);
>  }
>  
>  av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
> @@ -529,8 +581,12 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, 
> DSPContext* dsp) {
>      c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
>      c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
>  
> -    c->rv40_h_loop_filter = rv40_h_loop_filter;
> -    c->rv40_v_loop_filter = rv40_v_loop_filter;
> +    c->rv40_weak_loop_filter[0]     = rv40_h_weak_loop_filter;
> +    c->rv40_weak_loop_filter[1]     = rv40_v_weak_loop_filter;
> +    c->rv40_strong_loop_filter[0]   = rv40_h_strong_loop_filter;
> +    c->rv40_strong_loop_filter[1]   = rv40_v_strong_loop_filter;
> +    c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
> +    c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
>  
>      if (HAVE_MMX)
>          ff_rv40dsp_init_x86(c, dsp);

otherwise ok

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to