# HG changeset patch # User Dnyaneshwar G <dnyanesh...@multicorewareinc.com> # Date 1454410744 -19800 # Tue Feb 02 16:29:04 2016 +0530 # Node ID 5463e2b9f37e4952bb16e94673c6fd2991243145 # Parent dc62b47dd0d98f732165345883edac55320baec1 arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM suffix.
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/CMakeLists.txt --- a/source/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530 +++ b/source/CMakeLists.txt Tue Feb 02 16:29:04 2016 +0530 @@ -182,9 +182,11 @@ add_definitions(-march=i686) endif() if(ARM AND CROSS_COMPILE_ARM) - add_definitions(-march=armv6 -mfloat-abi=soft -mfpu=vfp) + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp) + add_definitions(${ARM_ARGS}) elseif(ARM) - add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp) + set(ARM_ARGS -march=armv6 -mfloat-abi=hard -mfpu=vfp) + add_definitions(${ARM_ARGS}) endif() if(FPROFILE_GENERATE) if(INTEL_CXX) @@ -418,7 +420,7 @@ add_subdirectory(encoder) add_subdirectory(common) -if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY) +if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) # this is required because of this cmake bug # http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170 if(WIN32) @@ -429,23 +431,33 @@ if(ARM OR CROSS_COMPILE_ARM) # compile ARM arch asm files here - + enable_language(ASM) + foreach(ASM ${ARM_ASMS}) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + list(APPEND ASM_SRCS ${ASM_SRC}) + list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) + add_custom_command( + OUTPUT ${ASM}.${SUFFIX} + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${ARM_ARGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX} + DEPENDS ${ASM_SRC}) + endforeach() elseif(X86) # compile X86 arch asm files here foreach(ASM ${MSVC_ASMS}) - set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM}) - list(APPEND YASM_SRCS ${YASM_SRC}) - list(APPEND YASM_OBJS ${ASM}.${SUFFIX}) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM}) + list(APPEND ASM_SRCS ${ASM_SRC}) + list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) add_custom_command( OUTPUT ${ASM}.${SUFFIX} - COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${YASM_SRC} -o ${ASM}.${SUFFIX} - DEPENDS ${YASM_SRC}) + COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX} + DEPENDS ${ASM_SRC}) endforeach() endif() endif() -source_group(ASM FILES ${YASM_SRCS}) -add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS}) +source_group(ASM FILES ${ASM_SRCS}) +add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS}) if(NOT MSVC) set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265) endif() @@ -479,7 +491,7 @@ option(ENABLE_SHARED "Build shared library" ON) if(ENABLE_SHARED) - add_library(x265-shared SHARED "${PROJECT_BINARY_DIR}/x265.def" ${YASM_OBJS} + add_library(x265-shared SHARED "${PROJECT_BINARY_DIR}/x265.def" ${ASM_OBJS} ${X265_RC_FILE} $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common>) if(EXTRA_LIB) target_link_libraries(x265-shared ${EXTRA_LIB}) @@ -575,7 +587,7 @@ # Xcode seems unable to link the CLI with libs, so link as one targget add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp - $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS}) + $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS}) else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE} ${ExportDefs} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp) diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/CMakeLists.txt --- a/source/common/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/CMakeLists.txt Tue Feb 02 16:29:04 2016 +0530 @@ -89,9 +89,10 @@ set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) # add ARM assembly/intrinsic files here - set(A_SRCS) + set(A_SRCS asm.S cpu-a.S mc-a.S) set(VEC_PRIMITIVES) + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") foreach(SRC ${C_SRCS}) set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) endforeach() diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/asm-primitives.cpp --- a/source/common/arm/asm-primitives.cpp Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/asm-primitives.cpp Tue Feb 02 16:29:04 2016 +0530 @@ -29,12 +29,18 @@ #include "x265.h" #include "cpu.h" +extern "C" { +#include "blockcopy8.h" +} namespace X265_NS { // private x265 namespace void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) { - + if (cpuMask & X265_CPU_NEON) + { + p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon); + } } } // namespace X265_NS diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/asm.S --- a/source/common/arm/asm.S Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/asm.S Tue Feb 02 16:29:04 2016 +0530 @@ -25,8 +25,6 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#include "x265_config.h" - .syntax unified #if HAVE_NEON diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/blockcopy8.h --- a/source/common/arm/blockcopy8.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/blockcopy8.h Tue Feb 02 16:29:04 2016 +0530 @@ -23,7 +23,9 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_BLOCKCOPY8_H -#define X265_BLOCKCOPY8_H +#ifndef X265_BLOCKCOPY8_ARM_H +#define X265_BLOCKCOPY8_ARM_H -#endif // ifndef X265_I386_PIXEL_H +void x265_blockcopy_pp_16x16_neon(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride); + +#endif // ifndef X265_I386_PIXEL_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/dct8.h --- a/source/common/arm/dct8.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/dct8.h Tue Feb 02 16:29:04 2016 +0530 @@ -22,7 +22,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_DCT8_H -#define X265_DCT8_H +#ifndef X265_DCT8_ARM_H +#define X265_DCT8_ARM_H -#endif // ifndef X265_DCT8_H +#endif // ifndef X265_DCT8_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/intrapred.h --- a/source/common/arm/intrapred.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/intrapred.h Tue Feb 02 16:29:04 2016 +0530 @@ -25,7 +25,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_INTRAPRED_H -#define X265_INTRAPRED_H +#ifndef X265_INTRAPRED_ARM_H +#define X265_INTRAPRED_ARM_H -#endif // ifndef X265_INTRAPRED_H +#endif // ifndef X265_INTRAPRED_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/ipfilter8.h --- a/source/common/arm/ipfilter8.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/ipfilter8.h Tue Feb 02 16:29:04 2016 +0530 @@ -22,7 +22,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_IPFILTER8_H -#define X265_IPFILTER8_H +#ifndef X265_IPFILTER8_ARM_H +#define X265_IPFILTER8_ARM_H -#endif // ifndef X265_IPFILTER8_H +#endif // ifndef X265_IPFILTER8_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/loopfilter.h --- a/source/common/arm/loopfilter.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/loopfilter.h Tue Feb 02 16:29:04 2016 +0530 @@ -23,7 +23,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_LOOPFILTER_H -#define X265_LOOPFILTER_H +#ifndef X265_LOOPFILTER_ARM_H +#define X265_LOOPFILTER_ARM_H -#endif // ifndef X265_LOOPFILTER_H +#endif // ifndef X265_LOOPFILTER_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/mc-a.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/arm/mc-a.S Tue Feb 02 16:29:04 2016 +0530 @@ -0,0 +1,102 @@ +/***************************************************************************** + * Copyright (C) 2016 x265 project + * + * Authors: Dnyaneshwar Gorade <dnyanesh...@multicorewareinc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +/* blockcopy_pp_16x16(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride) + * + * r0 - dst + * r1 - dstStride + * r2 - src + * d3 - srcStride */ +function x265_blockcopy_pp_16x16_neon + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + add r2, r2, r3 + add r0, r0, r1 + vld1.8 {q0}, [r2] + vst1.8 {q0}, [r0] + bx lr +endfunc diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/mc.h --- a/source/common/arm/mc.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/mc.h Tue Feb 02 16:29:04 2016 +0530 @@ -21,7 +21,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_MC_H -#define X265_MC_H +#ifndef X265_MC_ARM_H +#define X265_MC_ARM_H -#endif // ifndef X265_MC_H +#endif // ifndef X265_MC_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/pixel-util.h --- a/source/common/arm/pixel-util.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/pixel-util.h Tue Feb 02 16:29:04 2016 +0530 @@ -22,7 +22,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_PIXEL_UTIL_H -#define X265_PIXEL_UTIL_H +#ifndef X265_PIXEL_UTIL_ARM_H +#define X265_PIXEL_UTIL_ARM_H -#endif // ifndef X265_PIXEL_UTIL_H +#endif // ifndef X265_PIXEL_UTIL_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/pixel.h --- a/source/common/arm/pixel.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/arm/pixel.h Tue Feb 02 16:29:04 2016 +0530 @@ -27,7 +27,7 @@ * For more information, contact us at license @ x265.com. *****************************************************************************/ -#ifndef X265_I386_PIXEL_H -#define X265_I386_PIXEL_H +#ifndef X265_I386_PIXEL_ARM_H +#define X265_I386_PIXEL_ARM_H -#endif // ifndef X265_I386_PIXEL_H +#endif // ifndef X265_I386_PIXEL_ARM_H diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/primitives.cpp --- a/source/common/primitives.cpp Mon Jan 25 14:59:50 2016 +0530 +++ b/source/common/primitives.cpp Tue Feb 02 16:29:04 2016 +0530 @@ -260,7 +260,10 @@ void PFX(cpu_emms)(void) {} void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; } void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {} + +#if ENABLE_ASSEMBLY && X265_ARCH_ARM == 0 void PFX(cpu_neon_test)(void) {} int PFX(cpu_fast_neon_mrc_test)(void) { return 0; } +#endif } #endif diff -r dc62b47dd0d9 -r 5463e2b9f37e source/test/testharness.h --- a/source/test/testharness.h Mon Jan 25 14:59:50 2016 +0530 +++ b/source/test/testharness.h Tue Feb 02 16:29:04 2016 +0530 @@ -80,6 +80,9 @@ #elif X265_ARCH_ARM // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); + + // TO-DO: replace clock() function with appropriate ARM cpu instructions + a = clock(); #endif return a; } _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel