vlc/vlc-2.2 | branch: master | Felix Abecassis <[email protected]> | Fri May 22 15:32:39 2015 -0400| [d86a7e5df06cb6fed3c8826a6c61cdb03c95df43] | committer: Jean-Baptiste Kempf
sharpen: help compiler auto-vectorization Refs #9458 Signed-off-by: Tristan Matthews <[email protected]> (cherry picked from commit 32466e668505f25097e2811a563a19d16de5fbb7) Signed-off-by: Jean-Baptiste Kempf <[email protected]> > http://git.videolan.org/gitweb.cgi/vlc/vlc-2.2.git/?a=commit;h=d86a7e5df06cb6fed3c8826a6c61cdb03c95df43 --- modules/video_filter/sharpen.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/modules/video_filter/sharpen.c b/modules/video_filter/sharpen.c index 8714673..5311464 100644 --- a/modules/video_filter/sharpen.c +++ b/modules/video_filter/sharpen.c @@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) { picture_t *p_outpic; int i, j; - uint8_t *p_src = NULL; - uint8_t *p_out = NULL; + uint8_t *restrict p_src = NULL; + uint8_t *restrict p_out = NULL; int i_src_pitch; int i_out_pitch; int pix; const int v1 = -1; const int v2 = 3; /* 2^3 = 8 */ + const unsigned i_visible_lines = p_pic->p[Y_PLANE].i_visible_lines; + const unsigned i_visible_pitch = p_pic->p[Y_PLANE].i_visible_pitch; + const int sigma = var_GetFloat( p_filter, FILTER_PREFIX "sigma" ) * (1 << 20); if( !p_pic ) return NULL; @@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) /* perform convolution only on Y plane. Avoid border line. */ vlc_mutex_lock( &p_filter->p_sys->lock ); - for( i = 0; i < p_pic->p[Y_PLANE].i_visible_lines; i++ ) + + memcpy(p_out, p_src, i_visible_pitch); + + for( i = 1; i < i_visible_lines - 1; i++ ) { - if( (i == 0) || (i == p_pic->p[Y_PLANE].i_visible_lines - 1) ) - { - for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ ) - p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] ); - continue ; - } - for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ ) - { - if( (j == 0) || (j == p_pic->p[Y_PLANE].i_visible_pitch - 1) ) - { - p_out[i * i_out_pitch + j] = p_src[i * i_src_pitch + j]; - continue ; - } + p_out[i * i_out_pitch] = p_src[i * i_src_pitch]; + for( j = 1; j < i_visible_pitch - 1; j++ ) + { pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) + (p_src[(i - 1) * i_src_pitch + j ] * v1) + (p_src[(i - 1) * i_src_pitch + j + 1] * v1) + @@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) (p_src[(i + 1) * i_src_pitch + j + 1] * v1); pix = pix >= 0 ? clip(pix) : -clip(pix * -1); - p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] + - p_filter->p_sys->tab_precalc[pix + 256] ); + p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] + + ((pix * sigma) >> 20)); } + + p_out[i * i_out_pitch + i_visible_pitch - 1] = + p_src[i * i_src_pitch + i_visible_pitch - 1]; } + memcpy(&p_out[(i_visible_lines - 1) * i_out_pitch], + &p_src[(i_visible_lines - 1) * i_src_pitch], i_visible_pitch); + vlc_mutex_unlock( &p_filter->p_sys->lock ); plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] ); _______________________________________________ vlc-commits mailing list [email protected] https://mailman.videolan.org/listinfo/vlc-commits
