From: Clément Bœsch <clem...@stupeflix.com> --- I need to add a &3 (mod4) version now... I don't know if it can be any smaller. --- libswscale/aarch64/hscale.S | 2 +- libswscale/aarch64/swscale.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/libswscale/aarch64/hscale.S b/libswscale/aarch64/hscale.S index c32394c..e6bd365 100644 --- a/libswscale/aarch64/hscale.S +++ b/libswscale/aarch64/hscale.S @@ -20,7 +20,7 @@ #include "libavutil/aarch64/asm.S" -function ff_hscale_8_to_15_neon, export=1 +function ff_hscale_8_to_15_X8_neon, export=1 add x10, x4, w6, UXTW #1 // filter2 = filter + filterSize*2 (x2 because int16) 1: ldr w8, [x5], #4 // filterPos[0] ldr w9, [x5], #4 // filterPos[1] diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c index ebf76a5..f38effe 100644 --- a/libswscale/aarch64/swscale.c +++ b/libswscale/aarch64/swscale.c @@ -21,7 +21,7 @@ #include "libswscale/swscale_internal.h" #include "libavutil/aarch64/cpu.h" -void ff_hscale_8_to_15_neon(SwsContext *c, int16_t *dst, int dstW, +void ff_hscale_8_to_15_X8_neon(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); @@ -31,7 +31,8 @@ av_cold void ff_sws_init_swscale_aarch64(SwsContext *c) if (have_neon(cpu_flags)) { if (c->srcBpc == 8 && c->dstBpc <= 14) { - //c->hyScale = c->hcScale = ff_hscale_8_to_15_neon; + if (c->hLumFilterSize & 7) c->hyScale = ff_hscale_8_to_15_X8_neon; + if (c->hChrFilterSize & 7) c->hcScale = ff_hscale_8_to_15_X8_neon; } } } -- 2.7.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel