On 2017-01-15 22:55:51 +0200, Martin Storsjö wrote: > --- > libavcodec/aarch64/vp9lpf_neon.S | 16 +++++++++++++--- > 1 file changed, 13 insertions(+), 3 deletions(-) > > diff --git a/libavcodec/aarch64/vp9lpf_neon.S > b/libavcodec/aarch64/vp9lpf_neon.S > index 4553173..3894307 100644 > --- a/libavcodec/aarch64/vp9lpf_neon.S > +++ b/libavcodec/aarch64/vp9lpf_neon.S > @@ -316,20 +316,30 @@ > > uxtl_sz v0.8h, v1.8h, v22, \sz // p1 > uxtl_sz v2.8h, v3.8h, v25, \sz // q1 > +.if \wd >= 8 > + mov x5, v6.d[0] > +.endif > saddw_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp3, \sz // p1 + f > ssubw_sz v2.8h, v3.8h, v2.8h, v3.8h, \tmp3, \sz // q1 - f > +.if \wd >= 8 > +.ifc \sz, .16b > + mov x6, v6.d[1] > +.endif > +.endif
is it helpful have this mov here? It would look a little less ugly if you merged this .if with the one above > sqxtun_sz v0, v0.8h, v1.8h, \sz // out p1 > sqxtun_sz v2, v2.8h, v3.8h, \sz // out q1 > +.if \wd >= 8 > +.ifc \sz, .16b > + adds x5, x5, x6 > +.endif > +.endif > bit v22\sz, v0\sz, v5\sz // if (!hev && fm && > !flat8in) > bit v25\sz, v2\sz, v5\sz > > // If no pixels need flat8in, jump to flat8out > // (or to a writeout of the inner 4 pixels, for wd=8) > .if \wd >= 8 > - mov x5, v6.d[0] > .ifc \sz, .16b > - mov x6, v6.d[1] > - adds x5, x5, x6 > b.eq 6f > .else > cbz x5, 6f otherwise ok Janne _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
