Re: [FFmpeg-devel] [PATCH 2/9] lavfi/nlmeans: add SIMD-friendly assumptions for compute_safe_ssd_integral_image

2018-05-07 Thread Clément Bœsch
On Mon, May 07, 2018 at 12:14:37AM +0200, Michael Niedermayer wrote:
> On Sun, May 06, 2018 at 01:40:53PM +0200, Clément Bœsch wrote:
> > SIMD code will not have to deal with padding itself. Overwriting in that
> > function may have been possible but involve large overreading of the
> > sources. Instead, we simply make sure the width to process is always a
> > multiple of 16. Additionally, there must be some actual area to process
> > so the SIMD code can have its boundary checks after processing the first
> > pixels.
> > ---
> >  libavfilter/vf_nlmeans.c | 25 ++---
> >  1 file changed, 18 insertions(+), 7 deletions(-)
> > 
> > diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
> > index d222d3913e..21f981a605 100644
> > --- a/libavfilter/vf_nlmeans.c
> > +++ b/libavfilter/vf_nlmeans.c
> > @@ -157,6 +157,9 @@ static void compute_safe_ssd_integral_image_c(uint32_t 
> > *dst, int dst_linesize_32
> >  {
> >  int x, y;
> >  
> > +/* SIMD-friendly assumptions allowed here */
> > +av_assert2(!(w & 0xf) && w >= 16 && h >= 1);
> > +
> >  for (y = 0; y < h; y++) {
> >  uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1];
> >  
> > @@ -257,9 +260,16 @@ static void compute_ssd_integral_image(uint32_t *ii, 
> > int ii_linesize_32,
> >  // to compare the 2 sources pixels
> >  const int startx_safe = FFMAX(s1x, s2x);
> >  const int starty_safe = FFMAX(s1y, s2y);
> > -const int endx_safe   = FFMIN(s1x + w, s2x + w);
> > +const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
> >  const int endy_safe   = FFMIN(s1y + h, s2y + h);
> >  
> > +// deduce the safe area width and height
> > +const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
> > +const int safe_ph = endy_safe - starty_safe;
> > +
> > +// adjusted end x position of the safe area after width of the safe 
> > area gets aligned
> > +const int endx_safe = startx_safe + safe_pw;
> > +
> >  // top part where only one of s1 and s2 is still readable, or none at 
> > all
> >  compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
> >0, 0,
> > @@ -273,24 +283,25 @@ static void compute_ssd_integral_image(uint32_t *ii, 
> > int ii_linesize_32,
> >0, starty_safe,
> >src, linesize,
> >offx, offy, e, w, h,
> > -  startx_safe, endy_safe - 
> > starty_safe);
> > +  startx_safe, safe_ph);
> >  
> >  // main and safe part of the integral
> >  av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
> >  av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
> >  av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
> >  av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
> > -compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + 
> > startx_safe, ii_linesize_32,
> > -  src + (starty_safe - s1y) * linesize 
> > + (startx_safe - s1x), linesize,
> > -  src + (starty_safe - s2y) * linesize 
> > + (startx_safe - s2x), linesize,
> > -  endx_safe - startx_safe, endy_safe - 
> > starty_safe);
> > +if (safe_pw && safe_ph)
> > +dsp->compute_safe_ssd_integral_image(ii + 
> > starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
> > + src + (starty_safe - s1y) * 
> > linesize + (startx_safe - s1x), linesize,
> > + src + (starty_safe - s2y) * 
> > linesize + (startx_safe - s2x), linesize,
> > + safe_pw, safe_ph);
> 
> 
> i think this is or i am missing some change
> 
> libavfilter/vf_nlmeans.c: In function ‘compute_ssd_integral_image’:
> libavfilter/vf_nlmeans.c:294:9: error: ‘dsp’ undeclared (first use in this 
> function)
>  dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 
> + startx_safe, ii_linesize_32,
>  ^
> libavfilter/vf_nlmeans.c:294:9: note: each undeclared identifier is reported 
> only once for each function it appears in
> libavfilter/vf_nlmeans.c: At top level:
> libavfilter/vf_nlmeans.c:153:13: warning: ‘compute_safe_ssd_integral_image_c’ 
> defined but not used [-Wunused-function]
>  static void compute_safe_ssd_integral_image_c(uint32_t *dst, int 
> dst_linesize_32,
>  ^
> make: *** [libavfilter/vf_nlmeans.o] Error 1
> make: *** Waiting for unfinished jobs

Yeah I made a mistake while splitting commit, this is fixed locally. At
this step it's supposed to still be calling
compute_safe_ssd_integral_image_c() directly (but its last 2 parameters
changed).

-- 
Clément B.


signature.asc
Description: PGP signature
__

Re: [FFmpeg-devel] [PATCH 2/9] lavfi/nlmeans: add SIMD-friendly assumptions for compute_safe_ssd_integral_image

2018-05-06 Thread Michael Niedermayer
On Sun, May 06, 2018 at 01:40:53PM +0200, Clément Bœsch wrote:
> SIMD code will not have to deal with padding itself. Overwriting in that
> function may have been possible but involve large overreading of the
> sources. Instead, we simply make sure the width to process is always a
> multiple of 16. Additionally, there must be some actual area to process
> so the SIMD code can have its boundary checks after processing the first
> pixels.
> ---
>  libavfilter/vf_nlmeans.c | 25 ++---
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
> index d222d3913e..21f981a605 100644
> --- a/libavfilter/vf_nlmeans.c
> +++ b/libavfilter/vf_nlmeans.c
> @@ -157,6 +157,9 @@ static void compute_safe_ssd_integral_image_c(uint32_t 
> *dst, int dst_linesize_32
>  {
>  int x, y;
>  
> +/* SIMD-friendly assumptions allowed here */
> +av_assert2(!(w & 0xf) && w >= 16 && h >= 1);
> +
>  for (y = 0; y < h; y++) {
>  uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1];
>  
> @@ -257,9 +260,16 @@ static void compute_ssd_integral_image(uint32_t *ii, int 
> ii_linesize_32,
>  // to compare the 2 sources pixels
>  const int startx_safe = FFMAX(s1x, s2x);
>  const int starty_safe = FFMAX(s1y, s2y);
> -const int endx_safe   = FFMIN(s1x + w, s2x + w);
> +const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
>  const int endy_safe   = FFMIN(s1y + h, s2y + h);
>  
> +// deduce the safe area width and height
> +const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
> +const int safe_ph = endy_safe - starty_safe;
> +
> +// adjusted end x position of the safe area after width of the safe area 
> gets aligned
> +const int endx_safe = startx_safe + safe_pw;
> +
>  // top part where only one of s1 and s2 is still readable, or none at all
>  compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
>0, 0,
> @@ -273,24 +283,25 @@ static void compute_ssd_integral_image(uint32_t *ii, 
> int ii_linesize_32,
>0, starty_safe,
>src, linesize,
>offx, offy, e, w, h,
> -  startx_safe, endy_safe - starty_safe);
> +  startx_safe, safe_ph);
>  
>  // main and safe part of the integral
>  av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
>  av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
>  av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
>  av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
> -compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + 
> startx_safe, ii_linesize_32,
> -  src + (starty_safe - s1y) * linesize + 
> (startx_safe - s1x), linesize,
> -  src + (starty_safe - s2y) * linesize + 
> (startx_safe - s2x), linesize,
> -  endx_safe - startx_safe, endy_safe - 
> starty_safe);
> +if (safe_pw && safe_ph)
> +dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 
> + startx_safe, ii_linesize_32,
> + src + (starty_safe - s1y) * 
> linesize + (startx_safe - s1x), linesize,
> + src + (starty_safe - s2y) * 
> linesize + (startx_safe - s2x), linesize,
> + safe_pw, safe_ph);


i think this is or i am missing some change

libavfilter/vf_nlmeans.c: In function ‘compute_ssd_integral_image’:
libavfilter/vf_nlmeans.c:294:9: error: ‘dsp’ undeclared (first use in this 
function)
 dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + 
startx_safe, ii_linesize_32,
 ^
libavfilter/vf_nlmeans.c:294:9: note: each undeclared identifier is reported 
only once for each function it appears in
libavfilter/vf_nlmeans.c: At top level:
libavfilter/vf_nlmeans.c:153:13: warning: ‘compute_safe_ssd_integral_image_c’ 
defined but not used [-Wunused-function]
 static void compute_safe_ssd_integral_image_c(uint32_t *dst, int 
dst_linesize_32,
 ^
make: *** [libavfilter/vf_nlmeans.o] Error 1
make: *** Waiting for unfinished jobs

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/9] lavfi/nlmeans: add SIMD-friendly assumptions for compute_safe_ssd_integral_image

2018-05-06 Thread Clément Bœsch
SIMD code will not have to deal with padding itself. Overwriting in that
function may have been possible but involve large overreading of the
sources. Instead, we simply make sure the width to process is always a
multiple of 16. Additionally, there must be some actual area to process
so the SIMD code can have its boundary checks after processing the first
pixels.
---
 libavfilter/vf_nlmeans.c | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
index d222d3913e..21f981a605 100644
--- a/libavfilter/vf_nlmeans.c
+++ b/libavfilter/vf_nlmeans.c
@@ -157,6 +157,9 @@ static void compute_safe_ssd_integral_image_c(uint32_t 
*dst, int dst_linesize_32
 {
 int x, y;
 
+/* SIMD-friendly assumptions allowed here */
+av_assert2(!(w & 0xf) && w >= 16 && h >= 1);
+
 for (y = 0; y < h; y++) {
 uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1];
 
@@ -257,9 +260,16 @@ static void compute_ssd_integral_image(uint32_t *ii, int 
ii_linesize_32,
 // to compare the 2 sources pixels
 const int startx_safe = FFMAX(s1x, s2x);
 const int starty_safe = FFMAX(s1y, s2y);
-const int endx_safe   = FFMIN(s1x + w, s2x + w);
+const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
 const int endy_safe   = FFMIN(s1y + h, s2y + h);
 
+// deduce the safe area width and height
+const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
+const int safe_ph = endy_safe - starty_safe;
+
+// adjusted end x position of the safe area after width of the safe area 
gets aligned
+const int endx_safe = startx_safe + safe_pw;
+
 // top part where only one of s1 and s2 is still readable, or none at all
 compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
   0, 0,
@@ -273,24 +283,25 @@ static void compute_ssd_integral_image(uint32_t *ii, int 
ii_linesize_32,
   0, starty_safe,
   src, linesize,
   offx, offy, e, w, h,
-  startx_safe, endy_safe - starty_safe);
+  startx_safe, safe_ph);
 
 // main and safe part of the integral
 av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
 av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
 av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
 av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
-compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + 
startx_safe, ii_linesize_32,
-  src + (starty_safe - s1y) * linesize + 
(startx_safe - s1x), linesize,
-  src + (starty_safe - s2y) * linesize + 
(startx_safe - s2x), linesize,
-  endx_safe - startx_safe, endy_safe - 
starty_safe);
+if (safe_pw && safe_ph)
+dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + 
startx_safe, ii_linesize_32,
+ src + (starty_safe - s1y) * 
linesize + (startx_safe - s1x), linesize,
+ src + (starty_safe - s2y) * 
linesize + (startx_safe - s2x), linesize,
+ safe_pw, safe_ph);
 
 // right part of the integral
 compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
   endx_safe, starty_safe,
   src, linesize,
   offx, offy, e, w, h,
-  ii_w - endx_safe, endy_safe - 
starty_safe);
+  ii_w - endx_safe, safe_ph);
 
 // bottom part where only one of s1 and s2 is still readable, or none at 
all
 compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
-- 
2.17.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel