--- So here is the test file I was working on with the thoughts I had. --- ; This section is intended to possibly be included in x86inc.asm
; Align all constant to 32 bytes whether they are used in AVX code or not. %assign constant_align 32 ; Value to be used as padding to achieve alignment. Should not be used except ; when a user fails to define a constant as a multiple of 32 bytes. ; The default behavior of nasm/yasm if to pad with NOPs if you don't specify ; what the padding should be. Would a non-zero be of value to a user trying to ; debug? I don't know. %define constant_align_value db 0 ; Might it be better to not force alignment? Without alignment instructions ; that require alignment will cause a crash in development rather than just ; producing garbage or incorrect results. Of course that only works if the user ; misaligns a constant they use. Misalignment might not be caught until running ; unrelated code. ; A macro that imitates what x86inc.asm does for function names but without ; needing to deal with registers and stacks. ; - Mangles the name using the private prefix (ff) and also the prefix char (_) ; when that is needed. ; - It sets a global label to the correct name. ; - It aligns the data before finally placing the label. ; - Then the user just enters their constant data. %macro global_constant 1 %xdefine %1 mangle(private_prefix %+ _ %+ %1) global %1 align constant_align,constant_align_value %1: %endmacro ; An alternate but compatible form of the macro which lets the user define the ; constants on the same line. This form would allow easy sorting. And it is ; this I used in the patch. %macro global_constant 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) global %1 align constant_align,constant_align_value %1: %if %0 == 2 %2 %endif %endmacro SECTION_RODATA 32 global_constant pb_1 ; As this constant is 11 bytes lone, 21 bytes with value times 11 db 1 ; 0x0 will be inserted before the next constant. global_constant pb_ff ; This allows two labels to reference the same constant. global_constant pw_m1 times 32 db 0xff global_constant pb_2, times 10 db 2 ; Here is the two argument macro. ; A sed-like tool could automatically generate the "header" file from the labels ; defined in the constants file. --- libavutil/x86/constants.asm | 92 +++++++++++++++++++++++++++++++++++++++++++++ libavutil/x86/constants.h | 46 +++++++++++++++++++++++ libavutil/x86/constants.inc | 39 +++++++++++++++++++ tests/ref/fate/source | 1 + 4 files changed, 178 insertions(+) create mode 100644 libavutil/x86/constants.asm create mode 100644 libavutil/x86/constants.h create mode 100644 libavutil/x86/constants.inc diff --git a/libavutil/x86/constants.asm b/libavutil/x86/constants.asm new file mode 100644 index 0000000..fbdb1a2 --- /dev/null +++ b/libavutil/x86/constants.asm @@ -0,0 +1,92 @@ +;* MMX/SSE/AVX constants used across x86 dsp optimizations. +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +%include "libavutil/x86/x86util.asm" + +; This section is intended to possibly be included in x86inc.asm + +; Align all constant to 32 bytes whether they are used in AVX code or not. +%assign constant_align 32 + +; Value to be used as padding to achieve alignment. Should not be used except +; when a user fails to define a comnstant as a multiple of 32 bytes. +%define constant_align_value db 0x0 + +; A macro that imitates what x86inc.asm does for function names but without +; needing to deal with registers and stacks. +; - Mangles the name using the private prefix (ff) and also the prefix char (_) +; when that is needed. +; - It sets a global label to the correct name. +; - It aligns the data before finally placing the label. +; - Then the user just enters their constant data. +%macro global_constant 1-2+ + %xdefine %1 mangle(private_prefix %+ _ %+ %1) + global %1 + align constant_align,constant_align_value + %1: + %if %0 == 2 + %2 + %endif +%endmacro + +SECTION_RODATA 32 + +global_constant pb_0, times 32 db 0 +global_constant pb_1, times 32 db 1 +global_constant pb_2, times 32 db 2 +global_constant pb_3, times 32 db 3 +global_constant pb_15, times 32 db 15 +global_constant pb_80, times 32 db 0x80 +global_constant pb_FC, times 32 db 0xFC +global_constant pb_FE, times 32 db 0xFE + +global_constant pw_1, times 16 dw 1 +global_constant pw_2, times 16 dw 2 +global_constant pw_3, times 16 dw 3 +global_constant pw_4, times 16 dw 4 +global_constant pw_5, times 16 dw 5 +global_constant pw_8, times 16 dw 8 +global_constant pw_9, times 16 dw 9 +global_constant pw_10, times 16 dw 10 +global_constant pw_15, times 16 dw 15 +global_constant pw_16, times 16 dw 16 +global_constant pw_17, times 16 dw 17 +global_constant pw_18, times 16 dw 18 +global_constant pw_20, times 16 dw 20 +global_constant pw_32, times 16 dw 32 +global_constant pw_42, times 16 dw 42 +global_constant pw_53, times 16 dw 53 +global_constant pw_64, times 16 dw 64 +global_constant pw_96, times 16 dw 96 +global_constant pw_128, times 16 dw 128 +global_constant pw_255, times 16 dw 255 +global_constant pw_256, times 16 dw 256 +global_constant pw_512, times 16 dw 512 +global_constant pw_1019, times 16 dw 1019 +global_constant pw_1023, times 16 dw 1023 +global_constant pw_1024, times 16 dw 1024 +global_constant pw_2048, times 16 dw 2048 +global_constant pw_4096, times 16 dw 4096 +global_constant pw_8192, times 16 dw 8192 + +; TODO: perhaps change name to pb_ff +global_constant pw_m1, times 32 db 0xff + +global_constant pd_1, times 8 dd 1 + +global_constant ps_neg, times 8 dd 0x80000000 diff --git a/libavutil/x86/constants.h b/libavutil/x86/constants.h new file mode 100644 index 0000000..bab16d3 --- /dev/null +++ b/libavutil/x86/constants.h @@ -0,0 +1,46 @@ +#ifndef AVUTIL_X86_CONSTANTS_H +#define AVUTIL_X86_CONSTANTS_H + +#include "libavutil/x86/asm.h" + +extern const ymm_reg ff_pb_0; +extern const ymm_reg ff_pb_1; +extern const ymm_reg ff_pb_2; +extern const ymm_reg ff_pb_3; +extern const ymm_reg ff_pb_15; +extern const ymm_reg ff_pb_80; +extern const ymm_reg ff_pb_FC; +extern const ymm_reg ff_pb_FE; +extern const ymm_reg ff_pw_1; +extern const ymm_reg ff_pw_2; +extern const ymm_reg ff_pw_3; +extern const ymm_reg ff_pw_4; +extern const ymm_reg ff_pw_5; +extern const ymm_reg ff_pw_8; +extern const ymm_reg ff_pw_9; +extern const ymm_reg ff_pw_10; +extern const ymm_reg ff_pw_15; +extern const ymm_reg ff_pw_16; +extern const ymm_reg ff_pw_17; +extern const ymm_reg ff_pw_18; +extern const ymm_reg ff_pw_20; +extern const ymm_reg ff_pw_32; +extern const ymm_reg ff_pw_42; +extern const ymm_reg ff_pw_53; +extern const ymm_reg ff_pw_64; +extern const ymm_reg ff_pw_96; +extern const ymm_reg ff_pw_128; +extern const ymm_reg ff_pw_255; +extern const ymm_reg ff_pw_256; +extern const ymm_reg ff_pw_512; +extern const ymm_reg ff_pw_1019; +extern const ymm_reg ff_pw_1023; +extern const ymm_reg ff_pw_1024; +extern const ymm_reg ff_pw_2048; +extern const ymm_reg ff_pw_4096; +extern const ymm_reg ff_pw_8192; +extern const ymm_reg ff_pw_m1; +extern const ymm_reg ff_pd_1; +extern const ymm_reg ff_ps_neg; + +#endif /* AVCODEC_X86_CONSTANTS_H */ diff --git a/libavutil/x86/constants.inc b/libavutil/x86/constants.inc new file mode 100644 index 0000000..cb03fd9 --- /dev/null +++ b/libavutil/x86/constants.inc @@ -0,0 +1,39 @@ +cextern pb_0 +cextern pb_1 +cextern pb_2 +cextern pb_3 +cextern pb_15 +cextern pb_80 +cextern pb_FC +cextern pb_FE +cextern pw_1 +cextern pw_2 +cextern pw_3 +cextern pw_4 +cextern pw_5 +cextern pw_8 +cextern pw_9 +cextern pw_10 +cextern pw_15 +cextern pw_16 +cextern pw_17 +cextern pw_18 +cextern pw_20 +cextern pw_32 +cextern pw_42 +cextern pw_53 +cextern pw_64 +cextern pw_96 +cextern pw_128 +cextern pw_255 +cextern pw_256 +cextern pw_512 +cextern pw_1019 +cextern pw_1023 +cextern pw_1024 +cextern pw_2048 +cextern pw_4096 +cextern pw_8192 +cextern pw_m1 +cextern pd_1 +cextern ps_neg diff --git a/tests/ref/fate/source b/tests/ref/fate/source index 9cd8b30..8a8157a 100644 --- a/tests/ref/fate/source +++ b/tests/ref/fate/source @@ -11,6 +11,7 @@ libavfilter/log2_tab.c libavformat/file_open.c libavformat/golomb_tab.c libavformat/log2_tab.c +libavutil/x86/constants.h libavutil/x86_cpu.h libswresample/log2_tab.c libswscale/log2_tab.c -- 2.5.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel