At 2016-03-02 15:05:51,[email protected] wrote: ># HG changeset patch ># User Radhakrishnan VR <[email protected]> ># Date 1456814898 -19800 ># Tue Mar 01 12:18:18 2016 +0530 ># Node ID af64d5b645ff4afa119d43ef4ac1c79dd86357f6 ># Parent 291beccb67606494a9a144ca2cc4411ab3e21e50 >arm: Implement blockfill_s_neon ARM NEON > >diff -r 291beccb6760 -r af64d5b645ff source/common/arm/asm-primitives.cpp >--- a/source/common/arm/asm-primitives.cpp Fri Feb 26 16:23:56 2016 +0530 >+++ b/source/common/arm/asm-primitives.cpp Tue Mar 01 12:18:18 2016 +0530 >@@ -42,6 +42,12 @@ > { > if (cpuMask & X265_CPU_NEON) > { >+ // Block_fill >+ p.cu[BLOCK_4x4].blockfill_s = PFX(blockfill_s_4x4_neon); >+ p.cu[BLOCK_8x8].blockfill_s = PFX(blockfill_s_8x8_neon); >+ p.cu[BLOCK_16x16].blockfill_s = PFX(blockfill_s_16x16_neon); >+ p.cu[BLOCK_32x32].blockfill_s = PFX(blockfill_s_32x32_neon); >+ > // Blockcopy_ss > p.cu[BLOCK_4x4].copy_ss = PFX(blockcopy_ss_4x4_neon); > p.cu[BLOCK_8x8].copy_ss = PFX(blockcopy_ss_8x8_neon); >diff -r 291beccb6760 -r af64d5b645ff source/common/arm/blockcopy8.S >--- a/source/common/arm/blockcopy8.S Fri Feb 26 16:23:56 2016 +0530 >+++ b/source/common/arm/blockcopy8.S Tue Mar 01 12:18:18 2016 +0530 >@@ -311,3 +311,44 @@ > bne loop_css64 > bx lr > endfunc >+ >+// void x265_blockfill_s_neon(int16_t* dst, intptr_t dstride, int16_t val) >+function x265_blockfill_s_4x4_neon >+ vdup.u16 d0, r2 >+ lsl r1, #1 >+.rept 4 >+ vst1.16 {d0}, [r0], r1 >+.endr >+ bx lr >+endfunc >+ >+function x265_blockfill_s_8x8_neon >+ vdup.u16 q0, r2 >+ lsl r1, #1 >+.rept 8 >+ vst1.16 {q0}, [r0], r1 >+.endr >+ bx lr >+endfunc >+ >+function x265_blockfill_s_16x16_neon >+ vdup.u16 q0, r2 >+ vmov.u16 q1, q0
register to register copy unnecessary data size postfix (.u16)
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
