On Tue, Feb 2, 2016 at 11:30 AM, <[email protected]> wrote:
> # HG changeset patch > # User Dnyaneshwar G <[email protected]> > # Date 1454327470 -19800 > # Mon Feb 01 17:21:10 2016 +0530 > # Node ID 894e0fce5d14844d3c85cdb2a287f302fc8cffca > # Parent dc62b47dd0d98f732165345883edac55320baec1 > arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM > suffix. > > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/CMakeLists.txt > --- a/source/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/CMakeLists.txt Mon Feb 01 17:21:10 2016 +0530 > @@ -182,9 +182,11 @@ > add_definitions(-march=i686) > endif() > if(ARM AND CROSS_COMPILE_ARM) > - add_definitions(-march=armv6 -mfloat-abi=soft -mfpu=vfp) > + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp) > + add_definitions(${ARM_ARGS}) > elseif(ARM) > - add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp) > + set(ARM_ARGS -march=armv6 -mfloat-abi=hard -mfpu=vfp) > + add_definitions(${ARM_ARGS}) > endif() > if(FPROFILE_GENERATE) > if(INTEL_CXX) > @@ -418,7 +420,7 @@ > add_subdirectory(encoder) > add_subdirectory(common) > > -if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY) > +if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) > # this is required because of this cmake bug > # http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170 > if(WIN32) > @@ -429,7 +431,17 @@ > > if(ARM OR CROSS_COMPILE_ARM) > # compile ARM arch asm files here > - > + enable_language(ASM) > + foreach(ASM ${ARM_ASMS}) > + set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) > + list(APPEND YASM_SRCS ${YASM_SRC}) > + list(APPEND YASM_OBJS ${ASM}.${SUFFIX}) > + add_custom_command( > + OUTPUT ${ASM}.${SUFFIX} > + COMMAND ${CMAKE_CXX_COMPILER} > + ARGS ${ARM_ARGS} -c ${YASM_SRC} -o ${ASM}.${SUFFIX} > + DEPENDS ${YASM_SRC}) > + endforeach() > Can you please rename all YASM_SRCS/YASM_OBJS as just ASM_SRCS/ASM_OBJS? YASM is the assembler for just Intel architectures and since we're also supporting ARM now, it is prudent to rename. elseif(X86) > # compile X86 arch asm files here > foreach(ASM ${MSVC_ASMS}) > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/CMakeLists.txt > --- a/source/common/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/CMakeLists.txt Mon Feb 01 17:21:10 2016 +0530 > @@ -89,9 +89,10 @@ > set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h > dct8.h loopfilter.h) > > # add ARM assembly/intrinsic files here > - set(A_SRCS) > + set(ARM_SRCS asm.S cpu-a.S mc-a.S) > Continue to call this A_SRCS to denote "assembly sources", so that we are consistent in the naming convention with x86 assembly. > set(VEC_PRIMITIVES) > > + set(ARM_ASMS "${ARM_SRCS}" CACHE INTERNAL "ARM Assembly Sources") > foreach(SRC ${C_SRCS}) > set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) > endforeach() > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm-primitives.cpp > --- a/source/common/arm/asm-primitives.cpp Mon Jan 25 14:59:50 2016 > +0530 > +++ b/source/common/arm/asm-primitives.cpp Mon Feb 01 17:21:10 2016 > +0530 > @@ -29,12 +29,18 @@ > #include "x265.h" > #include "cpu.h" > > +extern "C" { > +#include "blockcopy8.h" > +} > > namespace X265_NS { > // private x265 namespace > > void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) > { > - > + if (cpuMask & X265_CPU_NEON) > + { > + p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon); > + } > } > } // namespace X265_NS > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm.S > --- a/source/common/arm/asm.S Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/asm.S Mon Feb 01 17:21:10 2016 +0530 > @@ -25,8 +25,6 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#include "x265_config.h" > - > .syntax unified > > #if HAVE_NEON > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/blockcopy8.h > --- a/source/common/arm/blockcopy8.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/blockcopy8.h Mon Feb 01 17:21:10 2016 +0530 > @@ -23,7 +23,9 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_BLOCKCOPY8_H > -#define X265_BLOCKCOPY8_H > +#ifndef X265_BLOCKCOPY8_ARM_H > +#define X265_BLOCKCOPY8_ARM_H > > -#endif // ifndef X265_I386_PIXEL_H > +void x265_blockcopy_pp_16x16_neon(pixel* dst, intptr_t dstStride, const > pixel* src, intptr_t srcStride); > + > +#endif // ifndef X265_I386_PIXEL_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/dct8.h > --- a/source/common/arm/dct8.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/dct8.h Mon Feb 01 17:21:10 2016 +0530 > @@ -22,7 +22,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_DCT8_H > -#define X265_DCT8_H > +#ifndef X265_DCT8_ARM_H > +#define X265_DCT8_ARM_H > > -#endif // ifndef X265_DCT8_H > +#endif // ifndef X265_DCT8_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/intrapred.h > --- a/source/common/arm/intrapred.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/intrapred.h Mon Feb 01 17:21:10 2016 +0530 > @@ -25,7 +25,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_INTRAPRED_H > -#define X265_INTRAPRED_H > +#ifndef X265_INTRAPRED_ARM_H > +#define X265_INTRAPRED_ARM_H > > -#endif // ifndef X265_INTRAPRED_H > +#endif // ifndef X265_INTRAPRED_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/ipfilter8.h > --- a/source/common/arm/ipfilter8.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/ipfilter8.h Mon Feb 01 17:21:10 2016 +0530 > @@ -22,7 +22,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_IPFILTER8_H > -#define X265_IPFILTER8_H > +#ifndef X265_IPFILTER8_ARM_H > +#define X265_IPFILTER8_ARM_H > > -#endif // ifndef X265_IPFILTER8_H > +#endif // ifndef X265_IPFILTER8_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/loopfilter.h > --- a/source/common/arm/loopfilter.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/loopfilter.h Mon Feb 01 17:21:10 2016 +0530 > @@ -23,7 +23,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_LOOPFILTER_H > -#define X265_LOOPFILTER_H > +#ifndef X265_LOOPFILTER_ARM_H > +#define X265_LOOPFILTER_ARM_H > > -#endif // ifndef X265_LOOPFILTER_H > +#endif // ifndef X265_LOOPFILTER_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc-a.S > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/source/common/arm/mc-a.S Mon Feb 01 17:21:10 2016 +0530 > @@ -0,0 +1,102 @@ > > +/***************************************************************************** > + * Copyright (C) 2016 x265 project > + * > + * Authors: Dnyaneshwar Gorade <[email protected]> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, > USA. > + * > + * This program is also available under a commercial proprietary license. > + * For more information, contact us at license @ x265.com. > + > *****************************************************************************/ > + > +#include "asm.S" > + > +.section .rodata > + > +.align 4 > + > +.text > + > +/* blockcopy_pp_16x16(pixel* dst, intptr_t dstStride, const pixel* src, > intptr_t srcStride) > + * > + * r0 - dst > + * r1 - dstStride > + * r2 - src > + * d3 - srcStride */ > +function x265_blockcopy_pp_16x16_neon > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + add r2, r2, r3 > + add r0, r0, r1 > + vld1.8 {q0}, [r2] > + vst1.8 {q0}, [r0] > + bx lr > +endfunc > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc.h > --- a/source/common/arm/mc.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/mc.h Mon Feb 01 17:21:10 2016 +0530 > @@ -21,7 +21,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_MC_H > -#define X265_MC_H > +#ifndef X265_MC_ARM_H > +#define X265_MC_ARM_H > > -#endif // ifndef X265_MC_H > +#endif // ifndef X265_MC_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel-util.h > --- a/source/common/arm/pixel-util.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/pixel-util.h Mon Feb 01 17:21:10 2016 +0530 > @@ -22,7 +22,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_PIXEL_UTIL_H > -#define X265_PIXEL_UTIL_H > +#ifndef X265_PIXEL_UTIL_ARM_H > +#define X265_PIXEL_UTIL_ARM_H > > -#endif // ifndef X265_PIXEL_UTIL_H > +#endif // ifndef X265_PIXEL_UTIL_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel.h > --- a/source/common/arm/pixel.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/arm/pixel.h Mon Feb 01 17:21:10 2016 +0530 > @@ -27,7 +27,7 @@ > * For more information, contact us at license @ x265.com. > > *****************************************************************************/ > > -#ifndef X265_I386_PIXEL_H > -#define X265_I386_PIXEL_H > +#ifndef X265_I386_PIXEL_ARM_H > +#define X265_I386_PIXEL_ARM_H > > -#endif // ifndef X265_I386_PIXEL_H > +#endif // ifndef X265_I386_PIXEL_ARM_H > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/primitives.cpp > --- a/source/common/primitives.cpp Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/common/primitives.cpp Mon Feb 01 17:21:10 2016 +0530 > @@ -260,7 +260,10 @@ > void PFX(cpu_emms)(void) {} > void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, > uint32_t *) { *eax = 0; } > void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {} > + > +#if ENABLE_ASSEMBLY && X265_ARCH_ARM == 0 > void PFX(cpu_neon_test)(void) {} > int PFX(cpu_fast_neon_mrc_test)(void) { return 0; } > +#endif > } > #endif > diff -r dc62b47dd0d9 -r 894e0fce5d14 source/test/testharness.h > --- a/source/test/testharness.h Mon Jan 25 14:59:50 2016 +0530 > +++ b/source/test/testharness.h Mon Feb 01 17:21:10 2016 +0530 > @@ -80,6 +80,9 @@ > #elif X265_ARCH_ARM > // TOD-DO: verify following inline asm to get cpu Timestamp Counter > for ARM arch > // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); > + > + // TO-DO: replace clock() function with appropriate ARM cpu > instructions > + a = clock(); > #endif > return a; > } > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
