[libav-devel] [PATCH 1/3] x86: add missing XOP checks and macros
Signed-off-by: James Almer jamr...@gmail.com --- configure | 5 + libavutil/x86/cpu.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/configure b/configure index 72cf831..96cbe5d 100755 --- a/configure +++ b/configure @@ -270,6 +270,7 @@ Optimization options (experts only): --disable-sse4 disable SSE4 optimizations --disable-sse42 disable SSE4.2 optimizations --disable-avxdisable AVX optimizations + --disable-xopdisable XOP optimizations --disable-fma4 disable FMA4 optimizations --disable-avx2 disable AVX2 optimizations --disable-armv5tedisable armv5te optimizations @@ -1252,6 +1253,7 @@ ARCH_EXT_LIST_X86=' avx avx2 cpunop +xop fma4 i686 mmx @@ -1575,6 +1577,7 @@ ssse3_deps=sse3 sse4_deps=ssse3 sse42_deps=sse4 avx_deps=sse42 +xop_deps=avx fma4_deps=avx avx2_deps=avx @@ -3757,6 +3760,7 @@ EOF check_yasm movbe ecx, [5] enable yasm || die yasm/nasm not found or too old. Use --disable-yasm for a crippled build. +check_yasm vpmacsdd xmm0, xmm1, xmm2, xmm3 || disable xop_external check_yasm vfmaddps ymm0, ymm1, ymm2, ymm3 || disable fma4_external check_yasm CPU amdnop enable cpunop fi @@ -4289,6 +4293,7 @@ if enabled x86; then echo SSE enabled ${sse-no} echo SSSE3 enabled ${ssse3-no} echo AVX enabled ${avx-no} +echo XOP enabled ${xop-no} echo FMA4 enabled ${fma4-no} echo i686 features enabled ${i686-no} echo CMOV is fast ${fast_cmov-no} diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 5303c5a..40daf44 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -37,6 +37,7 @@ #define X86_SSE4(flags) CPUEXT(flags, SSE4) #define X86_SSE42(flags)CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) +#define X86_XOP(flags) CPUEXT(flags, XOP) #define X86_FMA4(flags) CPUEXT(flags, FMA4) #define X86_AVX2(flags) CPUEXT(flags, AVX2) @@ -51,6 +52,7 @@ #define EXTERNAL_SSE4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4) #define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) +#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) #define EXTERNAL_FMA4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) #define EXTERNAL_AVX2(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) @@ -65,6 +67,7 @@ #define INLINE_SSE4(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE4) #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42) #define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX) +#define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP) #define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4) #define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/3] x86: add detection for Bit Manipulation Instruction sets
Based on x264 code Signed-off-by: James Almer jamr...@gmail.com --- libavutil/cpu.c | 6 ++ libavutil/cpu.h | 2 ++ libavutil/x86/cpu.c | 16 +++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 972e4eb..d651eb2 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -90,6 +90,8 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) +#define CPUFLAG_BMI1 (AV_CPU_FLAG_BMI1) +#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | CPUFLAG_BMI1) static const AVOption cpuflags_opts[] = { { flags , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = flags }, #if ARCH_PPC @@ -111,6 +113,8 @@ int av_parse_cpu_flags(const char *s) { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 },.unit = flags }, +{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI1 },.unit = flags }, +{ bmi2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI2 },.unit = flags }, { 3dnow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW },.unit = flags }, { 3dnowext, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT },.unit = flags }, { cmov, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV },.unit = flags }, @@ -212,6 +216,8 @@ static const struct { { AV_CPU_FLAG_3DNOWEXT, 3dnowext }, { AV_CPU_FLAG_CMOV, cmov }, { AV_CPU_FLAG_AVX2, avx2 }, +{ AV_CPU_FLAG_BMI1, bmi1 }, +{ AV_CPU_FLAG_BMI2, bmi2 }, #endif { 0 } }; diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 934b3be..517c520 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -50,6 +50,8 @@ #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov #define AV_CPU_FLAG_AVX2 0x8000 /// AVX2 functions: requires OS support even if YMM registers aren't used #define AV_CPU_FLAG_FMA30x1 /// Haswell FMA3 functions +#define AV_CPU_FLAG_BMI10x2 /// Bit Manipulation Instruction Set 1 +#define AV_CPU_FLAG_BMI20x4 /// Bit Manipulation Instruction Set 2 #define AV_CPU_FLAG_ALTIVEC 0x0001 /// standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index bf5e9fc..4c96c27 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -137,16 +137,22 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_FMA3; } } -#if HAVE_AVX2 +#endif /* HAVE_AVX */ +#endif /* HAVE_SSE */ if (max_std_level = 7) { cpuid(7, eax, ebx, ecx, edx); +#if HAVE_AVX2 if (ebx0x0020) rval |= AV_CPU_FLAG_AVX2; -/* TODO: BMI1/2 */ -} #endif /* HAVE_AVX2 */ -#endif /* HAVE_AVX */ -#endif /* HAVE_SSE */ +/* BMI1/2 don't need OS support */ +if (ebx0x0008) +{ +rval |= AV_CPU_FLAG_BMI1; +if (ebx0x0100) +rval |= AV_CPU_FLAG_BMI2; +} +} } cpuid(0x8000, max_ext_level, ebx, ecx, edx); -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/3] x86: add detection for FMA3 instruction set
Based on x264 code Signed-off-by: James Almer jamr...@gmail.com --- configure | 5 + libavutil/cpu.c | 3 +++ libavutil/cpu.h | 1 + libavutil/x86/cpu.c | 5 - libavutil/x86/cpu.h | 3 +++ 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 96cbe5d..82077b5 100755 --- a/configure +++ b/configure @@ -271,6 +271,7 @@ Optimization options (experts only): --disable-sse42 disable SSE4.2 optimizations --disable-avxdisable AVX optimizations --disable-xopdisable XOP optimizations + --disable-fma3 disable FMA3 optimizations --disable-fma4 disable FMA4 optimizations --disable-avx2 disable AVX2 optimizations --disable-armv5tedisable armv5te optimizations @@ -1254,6 +1255,7 @@ ARCH_EXT_LIST_X86=' avx2 cpunop xop +fma3 fma4 i686 mmx @@ -1578,6 +1580,7 @@ sse4_deps=ssse3 sse42_deps=sse4 avx_deps=sse42 xop_deps=avx +fma3_deps=avx fma4_deps=avx avx2_deps=avx @@ -3761,6 +3764,7 @@ EOF check_yasm movbe ecx, [5] enable yasm || die yasm/nasm not found or too old. Use --disable-yasm for a crippled build. check_yasm vpmacsdd xmm0, xmm1, xmm2, xmm3 || disable xop_external +check_yasm vfmadd132ps ymm0, ymm1, ymm2|| disable fma3_external check_yasm vfmaddps ymm0, ymm1, ymm2, ymm3 || disable fma4_external check_yasm CPU amdnop enable cpunop fi @@ -4294,6 +4298,7 @@ if enabled x86; then echo SSSE3 enabled ${ssse3-no} echo AVX enabled ${avx-no} echo XOP enabled ${xop-no} +echo FMA3 enabled ${fma3-no} echo FMA4 enabled ${fma4-no} echo i686 features enabled ${i686-no} echo CMOV is fast ${fast_cmov-no} diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 8c2cfb8..972e4eb 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -87,6 +87,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) +#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) static const AVOption cpuflags_opts[] = { @@ -107,6 +108,7 @@ int av_parse_cpu_flags(const char *s) { sse4.2 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 },.unit = flags }, { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX },.unit = flags }, { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP },.unit = flags }, +{ fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 },.unit = flags }, { 3dnow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW },.unit = flags }, @@ -204,6 +206,7 @@ static const struct { { AV_CPU_FLAG_SSE42, sse4.2 }, { AV_CPU_FLAG_AVX, avx}, { AV_CPU_FLAG_XOP, xop}, +{ AV_CPU_FLAG_FMA3, fma3 }, { AV_CPU_FLAG_FMA4, fma4 }, { AV_CPU_FLAG_3DNOW, 3dnow }, { AV_CPU_FLAG_3DNOWEXT, 3dnowext }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 29036e3..934b3be 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -49,6 +49,7 @@ #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov #define AV_CPU_FLAG_AVX2 0x8000 /// AVX2 functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_FMA30x1 /// Haswell FMA3 functions #define AV_CPU_FLAG_ALTIVEC 0x0001 /// standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 0e06d5d..bf5e9fc 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void) if ((ecx 0x1800) == 0x1800) { /* Check for OS support */ xgetbv(0, eax, edx); -if ((eax 0x6) == 0x6) +if ((eax 0x6) == 0x6) { rval |= AV_CPU_FLAG_AVX; +if (ecx0x1000) +rval |= AV_CPU_FLAG_FMA3; +} } #if HAVE_AVX2 if (max_std_level = 7) { diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 40daf44..50da30e 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -38,6 +38,7 @@ #define X86_SSE42(flags)CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) #define X86_XOP(flags
[libav-devel] [PATCH 0/3] support for FMA3 and BMI intruction sets
These are the missing instruction sets introduced with Haswell/Piledriver CPUs. Last two patches are based on x264 detection code. James Almer (3): x86: add missing XOP checks and macros x86: add detection for FMA3 instruction set x86: add detection for Bit Manipulation Instruction sets configure | 10 ++ libavutil/cpu.c | 9 + libavutil/cpu.h | 3 +++ libavutil/x86/cpu.c | 21 +++-- libavutil/x86/cpu.h | 6 ++ 5 files changed, 43 insertions(+), 6 deletions(-) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 0/3] support for FMA3 and BMI intruction sets
On 22/02/14 1:29 PM, Luca Barbato wrote: On 22/02/14 06:53, James Almer wrote: These are the missing instruction sets introduced with Haswell/Piledriver CPUs. Last two patches are based on x264 detection code. The set doesn't look bad at all but I'm wondering about it's usage: James Almer (3): x86: add missing XOP checks and macros x86: add detection for FMA3 instruction set Which is the relationship between FMA3 and FMA4 ? It's what happens when Intel and AMD don't talk to each other to coordinate stuff. Short story summary here: https://en.wikipedia.org/wiki/FMA_instruction_set Both sets do the same in essence. The only difference is that one uses three operands while the other uses four (Technically, FMA4 is the most flexible, but it's only supported by AMD). AMD added FMA4 starting with Bulldozer, then added FMA3 as well to Piledriver for compatibility reasons once Intel revealed they would use that starting with Haswell. x86: add detection for Bit Manipulation Instruction sets Is BMI a subset of AVX2? No, they are independent and work on general registers. AMD added BMI1 starting with Piledriver, which doesn't support AVX2, whereas Intel added both BMI1 and BMI2 starting with Haswell. lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 4/5] configure: Move cpunop into ARCH_EXT_LIST_X86
On 22/02/14 11:57 PM, Luca Barbato wrote: On 23/02/14 00:52, Dave Yeo wrote: HAVE_LIST has ARCH_EXT_LIST ARCH_EXT_LIST has ARCH_EXT_LIST_X86 I'm wondering why it is broken for you since it should not. lu https://fate.libav.org/x86.os2.444/2014002516 Probably related. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3] x86: add detection for Bit Manipulation Instruction sets
On 23/02/14 11:20 AM, Janne Grunau wrote: Do you plan to write assembly using any of these instructions? Having the tests while not using the instructions is just an exercise in completeness. Janne No, not for BMI1/2. I saw the TODO line as i was adding FMA3 so i thought i might as well get that out of the way while at it. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse
Build only on x86_32 targets. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 55 +--- libavcodec/x86/dcadsp_init.c | 44 +-- 2 files changed, 69 insertions(+), 30 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 56039ba..970ec3d 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -199,15 +199,31 @@ INIT_XMM sse DCA_LFE_FIR 0 DCA_LFE_FIR 1 -INIT_XMM sse2 +%macro SETZERO 1 +%if cpuflag(sse2) +pxor %1, %1 +%else +xorps %1, %1, %1 +%endif +%endmacro + +%macro SHUF 2 +%if cpuflag(sse2) +pshufd%1, %2, q0123 +%else +mova %1, %2 +shufps%1, %1, q0123 +%endif +%endmacro + %macro INNER_LOOP 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123 +SHUF m5, [ptr2 + j + (15 - 3) * 4] mova m6, [ptr1 + j] %if ARCH_X86_64 -pshufd m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123 +SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] mova m12, [ptr1 + j + mmsize] %endif mulps m6, [win + %1 + j + 16 * 4] @@ -224,10 +240,10 @@ INIT_XMM sse2 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123 +SHUF m6, [ptr2 + j + (31 - 3) * 4] mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -pshufd m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123 +SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] mova m11, [ptr1 + j + mmsize + 16 * 4] %endif mulps m5, [win + %1 + j + 32 * 4] @@ -245,20 +261,25 @@ INIT_XMM sse2 subj, 64 * 4 %endmacro -; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32], -; const float window[512], float out[32], -; intptr_t offset, float scale) +; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32], +; const float window[512], float out[32], +; intptr_t offset, float scale) +%macro SYNTH_FILTER 0 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 +%if cpuflag(sse2) movd scale, scalem +%else +movss scale, scalem +%endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else %define OFFQ offq %endif -pshufdm0, m0, 0 +SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp @@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %endif .mainloop ; m1 = a m2 = b m3 = c m4 = d -pxor m3, m3 -pxor m4, m4 +SETZERO m3 +SETZERO m4 mova m1, [buf2 + i] mova m2, [buf2 + i + 16 * 4] %if ARCH_X86_32 @@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define ptr2 r7q ; must be loaded %define win r8q %define jr9q -pxor m9, m9 -pxor m10, m10 +SETZERO m9 +SETZERO m10 mova m7, [buf2 + i + mmsize] mova m8, [buf2 + i + mmsize + 16 * 4] lea win, [windowq + i] @@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ subi, (ARCH_X86_64 + 1) * mmsize jge.mainloop RET +%endmacro + +%if ARCH_X86_32 +INIT_XMM sse +SYNTH_FILTER +%endif +INIT_XMM sse2 +SYNTH_FILTER diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 3821892..f8dd9b1 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -56,29 +56,39 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) } } -void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32], -const float window[512], -float out[32], intptr_t offset, float scale); +#define SYNTH_FILTER_FUNC(opt) \ +void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ + const float window[512], \ + float out[32], intptr_t offset, float scale); \ +static void synth_filter_##opt(FFTContext *imdct, \ + float *synth_buf_ptr, int
[libav-devel] [PATCH 0/3] synth filter float ASM
Here are some extra implementations that extend Christophe's work. The first one (SSE) is only for x86_32 targets as x86_64 guarantees SSE2 is available. Second patch is an AVX implementation using ymm registers. In my tests it was about 30 cycles faster than SSE2 on a Sandy Bridge CPU. I don't have proper numbers for the third patch since i could only test on an AMD rig, where functions using ymm registers tend to have subpar performance. It still beat the AVX version by a decent marging, though, so Haswell should see a nice boost with it. I could add an FMA4 version using xmm registers, which would benefit AMD users unlike these AVX/FMA3 ymm ones. Thoughts? James Almer (3): x86/synth_filter: add synth_filter_fma3 x86/synth_filter: add synth_filter_sse x86/synth_filter: add synth_filter_avx libavcodec/x86/dcadsp.asm| 109 --- libavcodec/x86/dcadsp_init.c | 52 ++--- 2 files changed, 107 insertions(+), 54 deletions(-) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/3] x86/synth_filter: add synth_filter_avx
Sandy Bridge Win64: 180 cycles in ff_synth_filter_inner_sse2 150 cycles in ff_synth_filter_inner_avx Also switch to a three operand format for some instructions to avoid assembly errors with Yasm 1.1.0 or older. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 76 +--- libavcodec/x86/dcadsp_init.c | 4 +++ 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 970ec3d..0d7c86e 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -200,18 +200,22 @@ DCA_LFE_FIR 0 DCA_LFE_FIR 1 %macro SETZERO 1 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) pxor %1, %1 %else xorps %1, %1, %1 %endif %endmacro -%macro SHUF 2 -%if cpuflag(sse2) -pshufd%1, %2, q0123 +%macro SHUF 3 +%if cpuflag(avx) +mova %3, [%2 - 16] +vperm2f128%1, %3, %3, 1 +vshufps %1, %1, %1, q0123 +%elif cpuflag(sse2) +pshufd%1, [%2], q0123 %else -mova %1, %2 +mova %1, [%2] shufps%1, %1, q0123 %endif %endmacro @@ -220,43 +224,43 @@ DCA_LFE_FIR 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -SHUF m5, [ptr2 + j + (15 - 3) * 4] +SHUF m5, ptr2 + j + (15 - 3) * 4, m6 mova m6, [ptr1 + j] %if ARCH_X86_64 -SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] +SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif -mulps m6, [win + %1 + j + 16 * 4] -mulps m5, [win + %1 + j] +mulps m6, m6, [win + %1 + j + 16 * 4] +mulps m5, m5, [win + %1 + j] %if ARCH_X86_64 -mulpsm12, [win + %1 + j + mmsize + 16 * 4] -mulpsm11, [win + %1 + j + mmsize] +mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4] +mulpsm11, m11, [win + %1 + j + mmsize] %endif -addps m2, m6 -subps m1, m5 +addps m2, m2, m6 +subps m1, m1, m5 %if ARCH_X86_64 -addps m8, m12 -subps m7, m11 +addps m8, m8, m12 +subps m7, m7, m11 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -SHUF m6, [ptr2 + j + (31 - 3) * 4] +SHUF m6, ptr2 + j + (31 - 3) * 4, m5 mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] +SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif -mulps m5, [win + %1 + j + 32 * 4] -mulps m6, [win + %1 + j + 48 * 4] +mulps m5, m5, [win + %1 + j + 32 * 4] +mulps m6, m6, [win + %1 + j + 48 * 4] %if ARCH_X86_64 -mulpsm11, [win + %1 + j + mmsize + 32 * 4] -mulpsm12, [win + %1 + j + mmsize + 48 * 4] +mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4] +mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4] %endif -addps m3, m5 -addps m4, m6 +addps m3, m3, m5 +addps m4, m4, m6 %if ARCH_X86_64 -addps m9, m11 -addpsm10, m12 +addps m9, m9, m11 +addpsm10, m10, m12 %endif subj, 64 * 4 %endmacro @@ -269,17 +273,21 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) movd scale, scalem +SPLATDm0 %else -movss scale, scalem +VBROADCASTSS m0, scalem %endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else +SPLATD xmm0 +%if cpuflag(avx) +vinsertf128 m0, m0, xmm0, 1 +%endif %define OFFQ offq %endif -SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp @@ -346,11 +354,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %endif ;~ out[i] = a * scale; ;~ out[i + 16] = b * scale; -mulps m1, scale -mulps m2, scale +mulps m1, m1, scale +mulps m2, m2, scale %if ARCH_X86_64 -mulps m7, scale -mulps m8, scale +mulps m7, m7, scale +mulps m8, m8, scale %endif ;~ synth_buf2[i] = c; ;~ synth_buf2[i + 16] = d; @@ -379,3 +387,7 @@ SYNTH_FILTER %endif INIT_XMM sse2 SYNTH_FILTER +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +SYNTH_FILTER +%endif diff --git a/libavcodec/x86
[libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3
Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 28 +++- libavcodec/x86/dcadsp_init.c | 4 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 0d7c86e..e1842ef 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -230,16 +230,12 @@ DCA_LFE_FIR 1 SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif -mulps m6, m6, [win + %1 + j + 16 * 4] +FMULADD_PSm2, m6, [win + %1 + j + 16 * 4], m2, m6 mulps m5, m5, [win + %1 + j] -%if ARCH_X86_64 -mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4] -mulpsm11, m11, [win + %1 + j + mmsize] -%endif -addps m2, m2, m6 subps m1, m1, m5 %if ARCH_X86_64 -addps m8, m8, m12 +FMULADD_PSm8, m12, [win + %1 + j + mmsize + 16 * 4], m8, m12 +mulpsm11, m11, [win + %1 + j + mmsize] subps m7, m7, m11 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) @@ -250,17 +246,11 @@ DCA_LFE_FIR 1 SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif -mulps m5, m5, [win + %1 + j + 32 * 4] -mulps m6, m6, [win + %1 + j + 48 * 4] +FMULADD_PSm3, m5, [win + %1 + j + 32 * 4], m3, m5 +FMULADD_PSm4, m6, [win + %1 + j + 48 * 4], m4, m6 %if ARCH_X86_64 -mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4] -mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4] -%endif -addps m3, m3, m5 -addps m4, m4, m6 -%if ARCH_X86_64 -addps m9, m9, m11 -addpsm10, m10, m12 +FMULADD_PSm9, m11, [win + %1 + j + mmsize + 32 * 4], m9, m11 +FMULADD_PS m10, m12, [win + %1 + j + mmsize + 48 * 4], m10, m12 %endif subj, 64 * 4 %endmacro @@ -391,3 +381,7 @@ SYNTH_FILTER INIT_YMM avx SYNTH_FILTER %endif +%if HAVE_FMA3_EXTERNAL +INIT_YMM fma3 +SYNTH_FILTER +%endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index ab20635..132f75e 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -80,6 +80,7 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) +SYNTH_FILTER_FUNC(fma3) av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) { @@ -96,4 +97,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_AVX(cpu_flags)) { s-synth_filter_float = synth_filter_avx; } +if (EXTERNAL_FMA3(cpu_flags)) { +s-synth_filter_float = synth_filter_fma3; +} } -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86: dcadsp: Fix linking with yasm and optimizations disabled
On 04/03/14 3:48 PM, Diego Biurrun wrote: Some optimized functions reference optimized symbols, so the functions must be explicitly disabled when those symbols are unavailable. --- libavcodec/x86/dcadsp_init.c |4 1 file changed, 4 insertions(+) diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 3821892..0b9428a 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -60,6 +60,7 @@ void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32], const float window[512], float out[32], intptr_t offset, float scale); +#if HAVE_SSE2_EXTERNAL static void synth_filter_sse2(FFTContext *imdct, float *synth_buf_ptr, int *synth_buf_offset, float synth_buf2[32], const float window[512], @@ -74,12 +75,15 @@ static void synth_filter_sse2(FFTContext *imdct, *synth_buf_offset = (*synth_buf_offset - 32) 511; } +#endif /* HAVE_SSE2_EXTERNAL */ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) { +#if HAVE_SSE2_EXTERNAL int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { s-synth_filter_float = synth_filter_sse2; } +#endif /* HAVE_SSE2_EXTERNAL */ } Most files use HAVE_YASM for this. It's more correct and allows the addition of other asm functions that don't depend on HAVE_SSE2_EXTERNAL. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] libx265: More API fixes
On 06/03/14 5:47 PM, Luca Barbato wrote: On 06/03/14 21:34, Reinhard Tartler wrote: Do we want this in release/10? Yes. The current stable version (x265 0.8) has X265_BUILD == 7. This change would make libav 10 only support the development branch, and most users and even distros usually prefer compiling using stable versions of every library. Wouldn't it be better to support both? A simple pre processor directive would be enough. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] avformat: more correct printf format specifiers
On 10/03/14 11:37 AM, Diego Biurrun wrote: --- libavformat/apetag.c |6 -- libavformat/asfdec.c |8 +--- libavformat/avidec.c |4 ++-- libavformat/bink.c | 10 +++--- libavformat/cafdec.c |5 - libavformat/crcenc.c |4 +++- libavformat/dfa.c|7 +-- libavformat/dxa.c|5 - libavformat/electronicarts.c |8 +--- libavformat/framecrcenc.c|4 +++- libavformat/gxf.c|6 +- libavformat/hnm.c| 11 +++ libavformat/iff.c|4 +++- libavformat/lxfdec.c |9 ++--- libavformat/matroskadec.c|3 ++- libavformat/mov.c|7 --- libavformat/mvi.c|5 - libavformat/mxfdec.c | 13 - libavformat/omadec.c |8 +--- libavformat/rmdec.c |4 +++- libavformat/rpl.c|4 ++-- libavformat/smacker.c|8 ++-- libavformat/smjpegdec.c |8 +--- libavformat/spdifenc.c |8 +--- libavformat/wtv.c|6 -- libavformat/xmv.c|6 +++--- 26 files changed, 114 insertions(+), 57 deletions(-) [...] @@ -539,14 +539,15 @@ static int mxf_read_partition_pack(void *arg, AVIOContext *pb, int tag, int size } if (partition-kag_size = 0 || partition-kag_size (1 20)) { -av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %i - guessing , partition-kag_size); +av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %PRId32 - guessing , + partition-kag_size); PRIi32? Ditto for any similar case. [...] @@ -501,7 +503,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, const char *key, int ty return; if (type == 0 length == 4) { -snprintf(buf, buf_size, %PRIi32, avio_rl32(pb)); +snprintf(buf, buf_size, %u, avio_rl32(pb)); Isn't this doing the opposite of what the patch was meant to do? ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] avformat: more correct printf format specifiers
On 10/03/14 11:55 PM, Tim Walker wrote: On 11 Mar 2014, at 03:38, James Almer jamr...@gmail.com wrote: On 10/03/14 11:37 AM, Diego Biurrun wrote: --- libavformat/apetag.c |6 -- libavformat/asfdec.c |8 +--- libavformat/avidec.c |4 ++-- libavformat/bink.c | 10 +++--- libavformat/cafdec.c |5 - libavformat/crcenc.c |4 +++- libavformat/dfa.c|7 +-- libavformat/dxa.c|5 - libavformat/electronicarts.c |8 +--- libavformat/framecrcenc.c|4 +++- libavformat/gxf.c|6 +- libavformat/hnm.c| 11 +++ libavformat/iff.c|4 +++- libavformat/lxfdec.c |9 ++--- libavformat/matroskadec.c|3 ++- libavformat/mov.c|7 --- libavformat/mvi.c|5 - libavformat/mxfdec.c | 13 - libavformat/omadec.c |8 +--- libavformat/rmdec.c |4 +++- libavformat/rpl.c|4 ++-- libavformat/smacker.c|8 ++-- libavformat/smjpegdec.c |8 +--- libavformat/spdifenc.c |8 +--- libavformat/wtv.c|6 -- libavformat/xmv.c|6 +++--- 26 files changed, 114 insertions(+), 57 deletions(-) [...] @@ -539,14 +539,15 @@ static int mxf_read_partition_pack(void *arg, AVIOContext *pb, int tag, int size } if (partition-kag_size = 0 || partition-kag_size (1 20)) { -av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %i - guessing , partition-kag_size); +av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %PRId32 - guessing , + partition-kag_size); PRIi32? Ditto for any similar case. Same result, but %d/%PRId32 are more commonly used than %i/%PRIi32 Fair enough. It was mostly a nit to keep the same specifier after expansion. [...] @@ -501,7 +503,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, const char *key, int ty return; if (type == 0 length == 4) { -snprintf(buf, buf_size, %PRIi32, avio_rl32(pb)); +snprintf(buf, buf_size, %u, avio_rl32(pb)); Isn't this doing the opposite of what the patch was meant to do? No, avio_rl32 returns unsigned int, not uint32_t, so %u is the correct specifier. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/4] lavc: Add private API to manipulate AVPacketList
On 13/08/13 11:49 PM, Luca Barbato wrote: --- libavcodec/avcodec.h | 5 + libavcodec/avpacket.c | 56 ++ libavcodec/internal.h | 36 libavformat/avformat.h | 6 -- 4 files changed, 97 insertions(+), 6 deletions(-) What's the status on this? I don't remember it ever being dropped and it certainly wasn't pushed. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse
On 14/03/14 7:56 AM, Christophe Gisquet wrote: Hi, 2014-03-04 3:25 GMT+01:00 James Almer jamr...@gmail.com: -INIT_XMM sse2 +%macro SETZERO 1 +%if cpuflag(sse2) +pxor %1, %1 +%else +xorps %1, %1, %1 +%endif +%endmacro + +%macro SHUF 2 +%if cpuflag(sse2) +pshufd%1, %2, q0123 +%else +mova %1, %2 +shufps%1, %1, q0123 +%endif +%endmacro We already discussed this, and indeed it is worth having SSE2 (integer) instructions instead of pure (float) SSE ones for the SSE2 version as they are actually faster. OK from me then for the asm. Not sure if the C part still applies cleanly, but this should be minor. It doesn't. I'll rebase and send the patchset again with some other changes later. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3
On 14/03/14 8:02 AM, Christophe Gisquet wrote: Hi, 2014-03-04 3:25 GMT+01:00 James Almer jamr...@gmail.com: snip Don't know fma3 but this is straightforward replacement of mul+add by a mac instruction. If the avx code is ok, I don't see how this wouldn't. I just noticed i can replace the mul+sub instructions as well with a single fnmaddps, so I'll send an updated version with that change. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 5/6] truehd: break out part of output_data into platform-specific callback.
On 19/03/14 2:24 PM, Ben Avison wrote: diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h index bd864d9..7b7640e 100644 --- a/libavcodec/mlpdsp.h +++ b/libavcodec/mlpdsp.h @@ -23,6 +23,7 @@ #define AVCODEC_MLPDSP_H #include stdint.h +#include mlp.h void ff_mlp_rematrix_channel(int32_t *samples, const int32_t *coeffs, @@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples, int access_unit_size_pow2, int32_t mask); +int32_t ff_mlp_pack_output(int32_t lossless_check_data, + int32_t (*sample_buffer)[MAX_CHANNELS], + void *data, + uint16_t blockpos, + uint8_t max_matrix_channel, + int is32, + uint8_t *ch_assign, + int8_t *output_shift); + typedef struct MLPDSPContext { void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, int firorder, int iirorder, @@ -52,6 +62,18 @@ typedef struct MLPDSPContext { int matrix_noise_shift, int access_unit_size_pow2, int32_t mask); +int32_t (*(*mlp_select_pack_output)(uint8_t max_matrix_channel, +int is32, +uint8_t *ch_assign, +int8_t *output_shift))(int32_t, int32_t (*)[], void *, uint16_t, uint8_t, int, uint8_t*, int8_t *); +int32_t (*mlp_pack_output)(int32_t lossless_check_data, + int32_t (*sample_buffer)[MAX_CHANNELS], + void *data, + uint16_t blockpos, + uint8_t max_matrix_channel, + int is32, + uint8_t *ch_assign, + int8_t *output_shift); } MLPDSPContext; void ff_mlpdsp_init(MLPDSPContext *c); Please put pointers first if possible, like you did for mlp_rematrix_channel. Something like +int32_t (*mlp_pack_output)(int32_t (*sample_buffer)[MAX_CHANNELS], + void *data, + uint8_t *ch_assign, + int8_t *output_shift, + int32_t lossless_check_data, + uint16_t blockpos, + uint8_t max_matrix_channel, + int is32); ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3
Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 22 ++ libavcodec/x86/dcadsp_init.c | 6 ++ 2 files changed, 28 insertions(+) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 662cb96..59d96bf 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -230,6 +230,14 @@ DCA_LFE_FIR 1 SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif +%if cpuflag(fma3) +fmaddps m2, m6, [win + %1 + j + 16 * 4], m2 +fnmaddps m1, m5, [win + %1 + j], m1 +%if ARCH_X86_64 +fmaddps m8, m12, [win + %1 + j + mmsize + 16 * 4], m8 +fnmaddps m7, m11, [win + %1 + j + mmsize], m7 +%endif +%else ; non-FMA mulps m6, m6, [win + %1 + j + 16 * 4] mulps m5, m5, [win + %1 + j] %if ARCH_X86_64 @@ -242,6 +250,7 @@ DCA_LFE_FIR 1 addps m8, m8, m12 subps m7, m7, m11 %endif +%endif ; cpuflag(fma3) ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) SHUF m6, ptr2 + j + (31 - 3) * 4, m5 @@ -250,6 +259,14 @@ DCA_LFE_FIR 1 SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif +%if cpuflag(fma3) +fmaddps m3, m5, [win + %1 + j + 32 * 4], m3 +fmaddps m4, m6, [win + %1 + j + 48 * 4], m4 +%if ARCH_X86_64 +fmaddps m9, m11, [win + %1 + j + mmsize + 32 * 4], m9 +fmaddps m10, m12, [win + %1 + j + mmsize + 48 * 4], m10 +%endif +%else ; non-FMA mulps m5, m5, [win + %1 + j + 32 * 4] mulps m6, m6, [win + %1 + j + 48 * 4] %if ARCH_X86_64 @@ -262,6 +279,7 @@ DCA_LFE_FIR 1 addps m9, m9, m11 addpsm10, m10, m12 %endif +%endif ; cpuflag(fma3) subj, 64 * 4 %endmacro @@ -400,3 +418,7 @@ INIT_XMM sse2 SYNTH_FILTER INIT_YMM avx SYNTH_FILTER +%if HAVE_FMA3_EXTERNAL +INIT_YMM fma3 +SYNTH_FILTER +%endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index d7e0d65..beef288 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -82,6 +82,9 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) +#if HAVE_FMA3_EXTERNAL +SYNTH_FILTER_FUNC(fma3) +#endif #endif /* HAVE_YASM */ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) @@ -100,5 +103,8 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_AVX(cpu_flags)) { s-synth_filter_float = synth_filter_avx; } +if (EXTERNAL_FMA3(cpu_flags)) { +s-synth_filter_float = synth_filter_fma3; +} #endif /* HAVE_YASM */ } -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/3 v2] x86/synth_filter: add synth_filter_avx
Sandy Bridge Win64: 180 cycles in ff_synth_filter_inner_sse2 150 cycles in ff_synth_filter_inner_avx Also switch some instructions to a three operand format to avoid assembly errors with Yasm 1.1.0 or older. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 85 +++- libavcodec/x86/dcadsp_init.c | 4 +++ 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 970ec3d..662cb96 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -200,18 +200,22 @@ DCA_LFE_FIR 0 DCA_LFE_FIR 1 %macro SETZERO 1 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) pxor %1, %1 %else xorps %1, %1, %1 %endif %endmacro -%macro SHUF 2 -%if cpuflag(sse2) -pshufd%1, %2, q0123 +%macro SHUF 3 +%if cpuflag(avx) +mova %3, [%2 - 16] +vperm2f128%1, %3, %3, 1 +vshufps %1, %1, %1, q0123 +%elif cpuflag(sse2) +pshufd%1, [%2], q0123 %else -mova %1, %2 +mova %1, [%2] shufps%1, %1, q0123 %endif %endmacro @@ -220,43 +224,43 @@ DCA_LFE_FIR 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -SHUF m5, [ptr2 + j + (15 - 3) * 4] +SHUF m5, ptr2 + j + (15 - 3) * 4, m6 mova m6, [ptr1 + j] %if ARCH_X86_64 -SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] +SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif -mulps m6, [win + %1 + j + 16 * 4] -mulps m5, [win + %1 + j] +mulps m6, m6, [win + %1 + j + 16 * 4] +mulps m5, m5, [win + %1 + j] %if ARCH_X86_64 -mulpsm12, [win + %1 + j + mmsize + 16 * 4] -mulpsm11, [win + %1 + j + mmsize] +mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4] +mulpsm11, m11, [win + %1 + j + mmsize] %endif -addps m2, m6 -subps m1, m5 +addps m2, m2, m6 +subps m1, m1, m5 %if ARCH_X86_64 -addps m8, m12 -subps m7, m11 +addps m8, m8, m12 +subps m7, m7, m11 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -SHUF m6, [ptr2 + j + (31 - 3) * 4] +SHUF m6, ptr2 + j + (31 - 3) * 4, m5 mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] +SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif -mulps m5, [win + %1 + j + 32 * 4] -mulps m6, [win + %1 + j + 48 * 4] +mulps m5, m5, [win + %1 + j + 32 * 4] +mulps m6, m6, [win + %1 + j + 48 * 4] %if ARCH_X86_64 -mulpsm11, [win + %1 + j + mmsize + 32 * 4] -mulpsm12, [win + %1 + j + mmsize + 48 * 4] +mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4] +mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4] %endif -addps m3, m5 -addps m4, m6 +addps m3, m3, m5 +addps m4, m4, m6 %if ARCH_X86_64 -addps m9, m11 -addpsm10, m12 +addps m9, m9, m11 +addpsm10, m10, m12 %endif subj, 64 * 4 %endmacro @@ -269,25 +273,34 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) movd scale, scalem +SPLATDm0 %else -movss scale, scalem +VBROADCASTSS m0, scalem %endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else +SPLATD xmm0 +%if cpuflag(avx) +vinsertf128 m0, m0, xmm0, 1 +%endif %define OFFQ offq %endif -SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp and r5q, -64 shl r5q, 2 +%if ARCH_X86_32 || notcpuflag(avx) mov OFFQ, r5q %define ir5q movi, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter +%else +%define i 0 +%define OFFQ r5q +%endif %define buf2 synth_buf2q %if ARCH_X86_32 @@ -306,8 +319,10 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define jr3q mov win, windowm mov ptr1, synth_bufm +%if ARCH_X86_32 || notcpuflag(avx) add win, i add ptr1, i +%endif %else ; ARCH_X86_64 %define ptr1 r6q %define ptr2 r7q ; must be loaded
[libav-devel] [PATCH 1/3 v2] x86/synth_filter: add synth_filter_sse
Build only on x86_32 targets. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 55 +--- libavcodec/x86/dcadsp_init.c | 45 ++-- 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 56039ba..970ec3d 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -199,15 +199,31 @@ INIT_XMM sse DCA_LFE_FIR 0 DCA_LFE_FIR 1 -INIT_XMM sse2 +%macro SETZERO 1 +%if cpuflag(sse2) +pxor %1, %1 +%else +xorps %1, %1, %1 +%endif +%endmacro + +%macro SHUF 2 +%if cpuflag(sse2) +pshufd%1, %2, q0123 +%else +mova %1, %2 +shufps%1, %1, q0123 +%endif +%endmacro + %macro INNER_LOOP 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123 +SHUF m5, [ptr2 + j + (15 - 3) * 4] mova m6, [ptr1 + j] %if ARCH_X86_64 -pshufd m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123 +SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] mova m12, [ptr1 + j + mmsize] %endif mulps m6, [win + %1 + j + 16 * 4] @@ -224,10 +240,10 @@ INIT_XMM sse2 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123 +SHUF m6, [ptr2 + j + (31 - 3) * 4] mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -pshufd m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123 +SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] mova m11, [ptr1 + j + mmsize + 16 * 4] %endif mulps m5, [win + %1 + j + 32 * 4] @@ -245,20 +261,25 @@ INIT_XMM sse2 subj, 64 * 4 %endmacro -; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32], -; const float window[512], float out[32], -; intptr_t offset, float scale) +; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32], +; const float window[512], float out[32], +; intptr_t offset, float scale) +%macro SYNTH_FILTER 0 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 +%if cpuflag(sse2) movd scale, scalem +%else +movss scale, scalem +%endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else %define OFFQ offq %endif -pshufdm0, m0, 0 +SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp @@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %endif .mainloop ; m1 = a m2 = b m3 = c m4 = d -pxor m3, m3 -pxor m4, m4 +SETZERO m3 +SETZERO m4 mova m1, [buf2 + i] mova m2, [buf2 + i + 16 * 4] %if ARCH_X86_32 @@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define ptr2 r7q ; must be loaded %define win r8q %define jr9q -pxor m9, m9 -pxor m10, m10 +SETZERO m9 +SETZERO m10 mova m7, [buf2 + i + mmsize] mova m8, [buf2 + i + mmsize + 16 * 4] lea win, [windowq + i] @@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ subi, (ARCH_X86_64 + 1) * mmsize jge.mainloop RET +%endmacro + +%if ARCH_X86_32 +INIT_XMM sse +SYNTH_FILTER +%endif +INIT_XMM sse2 +SYNTH_FILTER diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 65e3db5..5b77985 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -56,25 +56,31 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) } } -void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32], -const float window[512], -float out[32], intptr_t offset, float scale); -#if HAVE_YASM -static void synth_filter_sse2(FFTContext *imdct, - float *synth_buf_ptr, int *synth_buf_offset, - float synth_buf2[32], const float window[512], - float out[32], const float in[32], float scale) -{ -float *synth_buf= synth_buf_ptr + *synth_buf_offset; - -imdct-imdct_half(imdct, synth_buf, in); +#define SYNTH_FILTER_FUNC(opt
[libav-devel] [PATCH 0/3 v2] synth filter float ASM
Here are some extra implementations that extend Christophe's work. Differences with v1: * AVX/FMA3: Removed the main loop and related bookkeepeing for x64 since said loop would be run only once anyway. * FMA3: Replaced mulps+subps with FMA3 instructions, meaning two less instructions run per loop in that version. * Removed some unnecessary preprocessor guards and added some missing ones. Knowing that currently AMD has lackluster performance with ymm registers I could add an FMA4 version of this function using xmm registers, which would benefit said processors unlike the AVX/FMA3 ymm ones. Thoughts? James Almer (3): x86/synth_filter: add synth_filter_sse x86/synth_filter: add synth_filter_avx x86/synth_filter: add synth_filter_fma3 libavcodec/x86/dcadsp.asm| 138 --- libavcodec/x86/dcadsp_init.c | 55 +++-- 2 files changed, 143 insertions(+), 50 deletions(-) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3
On 24/03/14 11:28 AM, Diego Biurrun wrote: On Thu, Mar 20, 2014 at 03:37:56PM -0300, James Almer wrote: --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -230,6 +230,14 @@ DCA_LFE_FIR 1 mova m12, [ptr1 + j + mmsize] %endif +%if cpuflag(fma3) +fmaddps m2, m6, [win + %1 + j + 16 * 4], m2 +fnmaddps m1, m5, [win + %1 + j], m1 +%if ARCH_X86_64 +fmaddps m8, m12, [win + %1 + j + mmsize + 16 * 4], m8 +fnmaddps m7, m11, [win + %1 + j + mmsize], m7 +%endif +%else ; non-FMA Doesn't FMA3 imply x86_64? Diego No, no simd extension so far implies x86_64. Not even the upcoming AVX512. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3
On 24/03/14 3:07 PM, Diego Biurrun wrote: On Mon, Mar 24, 2014 at 02:59:08PM -0300, James Almer wrote: On 24/03/14 11:28 AM, Diego Biurrun wrote: On Thu, Mar 20, 2014 at 03:37:56PM -0300, James Almer wrote: --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -230,6 +230,14 @@ DCA_LFE_FIR 1 mova m12, [ptr1 + j + mmsize] %endif +%if cpuflag(fma3) +fmaddps m2, m6, [win + %1 + j + 16 * 4], m2 +fnmaddps m1, m5, [win + %1 + j], m1 +%if ARCH_X86_64 +fmaddps m8, m12, [win + %1 + j + mmsize + 16 * 4], m8 +fnmaddps m7, m11, [win + %1 + j + mmsize], m7 +%endif +%else ; non-FMA Doesn't FMA3 imply x86_64? No, no simd extension so far implies x86_64. Not even the upcoming AVX512. But which modern x86 SIMD extensions are available on x86_32? Diego All of them so far work on both x86_32 and x86_64, with the usual limitations for the former (8 general purpose registers and 8 simd registers). As i said, even AVX512, which hasn't been realized in hardware yet, will also be available for x86_32. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/3 v2] x86/synth_filter: add synth_filter_sse
On 28/03/14 4:15 PM, Jason Garrett-Glaser wrote: On Thu, Mar 20, 2014 at 11:37 AM, James Almer jamr...@gmail.com wrote: Build only on x86_32 targets. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 55 +--- libavcodec/x86/dcadsp_init.c | 45 ++-- 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 56039ba..970ec3d 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -199,15 +199,31 @@ INIT_XMM sse DCA_LFE_FIR 0 DCA_LFE_FIR 1 -INIT_XMM sse2 +%macro SETZERO 1 +%if cpuflag(sse2) +pxor %1, %1 +%else +xorps %1, %1, %1 +%endif +%endmacro Is there some reason we can't just use xorps here for all versions? I mean, it is float data, right? %if ARCH_X86_32 || WIN64 +%if cpuflag(sse2) movd scale, scalem +%else +movss scale, scalem +%endif Same here; does this need to be ifdeffed? Otherwise looks okay. Jason You're right that it's all float data, but both Christophe and I tested and xorps/shufps was a bit slower than pxor/pshufd (At least in my tests it was about five cycles slower), so i decided to use some ifdeffery to keep the SSE2 version intact. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3 v2] x86/synth_filter: add synth_filter_avx
On 28/03/14 4:16 PM, Jason Garrett-Glaser wrote: On Thu, Mar 20, 2014 at 11:37 AM, James Almer jamr...@gmail.com wrote: Sandy Bridge Win64: 180 cycles in ff_synth_filter_inner_sse2 150 cycles in ff_synth_filter_inner_avx Also switch some instructions to a three operand format to avoid assembly errors with Yasm 1.1.0 or older. If this is an issue, could we possibly resolve it in x86inc.asm instead of uglifying the asm? Jason Pretty much every AVX function in the tree is using the operand format to workaround this problem, so it will certainly be welcomed if someone fixes it. And i don't think it uglyfies the asm that much. Besides, with the addition of the FMA3 version it ends up having a nice vertical alignment. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] Add gen-rc tool for generating Windows resource files
On 30/03/14 10:49 AM, Vittorio Giovara wrote: From: Timothy Gu timothyg...@gmail.com --- Anyone willing to test this on Windows would be welcome. Vittorio tools/gen-rc | 122 +++ 1 file changed, 122 insertions(+) create mode 100755 tools/gen-rc diff --git a/tools/gen-rc b/tools/gen-rc new file mode 100755 index 000..269f2f6 --- /dev/null +++ b/tools/gen-rc @@ -0,0 +1,122 @@ +#!/bin/sh +# +# Copyright (c) 2012 James Almer +# Copyright (c) 2013 Tiancheng Timothy Gu +# +# This file is part of Libav. +# +# Libav is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# Libav is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Libav; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +## Help +die() { +cat EOF 2 +This script is used to generate Windows resources file for the Libav libraries. +The output .rc file is to be compiled by windres(1). It is mainly useful for +Libav developers to tweak and regenerate all resources files at once. + +Usage: $0 libname comment + +The script will output the file to 'libname/libname-without-libres.rc'. + +Example: $0 libavcodec 'Libav codecs library' +EOF +exit 1 +} + +# Script to generate all: +# (to remove prefix '# ' and add 'tools/' as prefix: sed -r 's/^.{2}/tools\//') +# gen-rc libavutil Libav utility library +# gen-rc libavcodecLibav codec library +# gen-rc libavformat Libav container format library +# gen-rc libavdevice Libav device handling library +# gen-rc libavfilter Libav audio/video filtering library +# gen-rc libavresample Libav audio resampling library +# gen-rc libswscaleLibav image rescaling library + +## Sanity checks and argument parsing +if test $# -lt 2 || test $# -gt 3; then +die +fi + +name=$1 +shortname=${name#lib} +comment=$2 +capname=`echo $name | awk '{print toupper($0)}'` +version=${capname}_VERSION + +mkdir -p $name +output=$name/${shortname}res.rc + +## REAL magic +cat EOF $output +/* + * Windows resource file for $name + * + * Copyright (C) 2012 James Almer + * Copyright (C) 2013 Tiancheng Timothy Gu + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include config.h + +#include windows.h + +#include libavutil/version.h + +#include $name/version.h + +1 VERSIONINFO +FILEVERSION ${version}_MAJOR, ${version}_MINOR, ${version}_MICRO, 0 +PRODUCTVERSION ${version}_MAJOR, ${version}_MINOR, ${version}_MICRO, 0 +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEOS VOS_NT_WINDOWS32 +FILETYPEVFT_DLL +{ +BLOCK StringFileInfo +{ +BLOCK 040904B0 +{ +VALUE CompanyName, Libav Project +VALUE FileDescription, $comment +VALUE FileVersion, AV_STRINGIFY($version) +VALUE InternalName, $name +VALUE LegalCopyright, Copyright (C) 2000- AV_STRINGIFY(CONFIG_THIS_YEAR) Libav Project This will fail because the commit adding CONFIG_THIS_YEAR is not in the tree. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] x86/synth_filter: remove the fma3 version ifdefs
This fixes compilation failures with --disable-fma3 Signed-off-by: James Almer jamr...@gmail.com --- See https://fate.libav.org/x86_32-linux-suncc-nosse/20140405142549 libavcodec/x86/dcadsp.asm| 2 -- libavcodec/x86/dcadsp_init.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 59d96bf..c42ee23 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -418,7 +418,5 @@ INIT_XMM sse2 SYNTH_FILTER INIT_YMM avx SYNTH_FILTER -%if HAVE_FMA3_EXTERNAL INIT_YMM fma3 SYNTH_FILTER -%endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index beef288..9acb818 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -82,9 +82,7 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) -#if HAVE_FMA3_EXTERNAL SYNTH_FILTER_FUNC(fma3) -#endif #endif /* HAVE_YASM */ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86/synth_filter: remove the fma3 version ifdefs
On 05/04/14 5:20 PM, Diego Biurrun wrote: On Sat, Apr 05, 2014 at 02:00:53PM -0300, James Almer wrote: This fixes compilation failures with --disable-fma3 Signed-off-by: James Almer jamr...@gmail.com --- See https://fate.libav.org/x86_32-linux-suncc-nosse/20140405142549 libavcodec/x86/dcadsp.asm| 2 -- libavcodec/x86/dcadsp_init.c | 2 -- 2 files changed, 4 deletions(-) Hmm, I cannot reproduce this with gcc ... Diego I can reproduce it with mingw-w64 4.8.2, configuring with --disable-fma3 then running make libavcodec/x86/dcadsp_init.o. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] matroskaenc: Allow VP9 and Opus in webm
On 31/05/14 4:23 AM, Anton Khirnov wrote: From: Tudor Suciu tudor.su...@gmail.com Signed-off-by: Anton Khirnov an...@khirnov.net --- The webm official page does not mention that, but Google seems to claim those are now offically supported. Nice to see they are keeping all the matroska traditions. --- It's official, they just haven't released an stable version of libwebm with the added support just yet. http://git.chromium.org/gitweb/?p=webm/libwebm.git;a=commitdiff;h=5efd6e3c1df766c08294ad19168e71522ee0d808 Not to mention both Gecko and Chromium based OS already can play VP9 webm files. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/6] Add av_dict_version() to make it quick to check if a dictionary has changed.
On 04/06/14 3:33 PM, Andrew Stone wrote: By comparing versions of dictionaries, it's possible to detect if metadata has changed. --- libavutil/dict.c | 8 libavutil/dict.h | 10 ++ 2 files changed, 18 insertions(+) Maybe revision is a better name to avoid confusion with functions and defines that return actual version numbers, like those from the libraries. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 15/15] hevc: Add x86 optimized idct functions
On 24/06/14 11:26 AM, Luca Barbato wrote: From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr Signed-off-by: Luca Barbato lu_z...@gentoo.org --- libavcodec/hevc.c | 19 - libavcodec/hevcdsp.c | 5 ++ libavcodec/hevcdsp.h | 4 +- libavcodec/hevcdsp_template.c | 176 +++-- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_idct.asm | 180 ++ libavcodec/x86/hevcdsp.h | 18 + libavcodec/x86/hevcdsp_init.c | 62 +++ 8 files changed, 349 insertions(+), 118 deletions(-) create mode 100644 libavcodec/x86/hevc_idct.asm [...] +%macro DC_ADD_INIT 2 +add %1w, ((1 14-8) + 1) +sar %1w, (15-8) +movd m0, %1 movd m0, %1d NASM x86_64 will complain otherwise. +lea %1, [%2*3] +SPLATWm0, m0, 0 +pxor m1, m1 +psubw m1, m0 +packuswb m0, m0 +packuswb m1, m1 +%endmacro ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions
On 24/06/14 11:26 AM, Luca Barbato wrote: From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr The functions only support x86_64. Fixes from Hendrik Leppkes and James Almer Signed-off-by: Luca Barbato lu_z...@gentoo.org --- libavcodec/hevcdsp.c |6 +- libavcodec/hevcdsp.h |3 + libavcodec/x86/Makefile |2 + libavcodec/x86/hevc_mc.asm| 1256 + libavcodec/x86/hevcdsp.h | 164 ++ libavcodec/x86/hevcdsp_init.c | 373 6 files changed, 1803 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/hevc_mc.asm create mode 100644 libavcodec/x86/hevcdsp.h create mode 100644 libavcodec/x86/hevcdsp_init.c Many of these functions are SSSE3 and a couple even SSE2 at most. It will require some init macros rewriting to change, but leaving things as is will make atom, conroe and bobcat cpus miss a considerable performance boost. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions
On 25/06/14 3:44 PM, Luca Barbato wrote: On 25/06/14 20:33, James Almer wrote: On 24/06/14 11:26 AM, Luca Barbato wrote: From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr The functions only support x86_64. Fixes from Hendrik Leppkes and James Almer Signed-off-by: Luca Barbato lu_z...@gentoo.org --- libavcodec/hevcdsp.c |6 +- libavcodec/hevcdsp.h |3 + libavcodec/x86/Makefile |2 + libavcodec/x86/hevc_mc.asm| 1256 + libavcodec/x86/hevcdsp.h | 164 ++ libavcodec/x86/hevcdsp_init.c | 373 6 files changed, 1803 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/hevc_mc.asm create mode 100644 libavcodec/x86/hevcdsp.h create mode 100644 libavcodec/x86/hevcdsp_init.c Many of these functions are SSSE3 and a couple even SSE2 at most. Can you guide me in this regard? The SSE4 functions are those using pextrw (with memory operand) and packusdw. hevc_put_hevc_bi_w2_{8,10} hevc_put_hevc_bi_w4_{8,10} hevc_put_hevc_bi_w6_{8,10} hevc_put_hevc_bi_w8_{8,10} hevc_put_hevc_uni_w2_{8,10} hevc_put_hevc_uni_w4_{8,10} hevc_put_hevc_uni_w6_{8,10} hevc_put_hevc_uni_w8_{8,10} hevc_put_hevc_uni_qpel_v{4,8}_10 hevc_put_hevc_uni_qpel_hv2_{8,10} hevc_put_hevc_uni_qpel_hv4_{8,10} hevc_put_hevc_uni_qpel_hv6_{8,10} hevc_put_hevc_uni_qpel_hv8_{8,10} hevc_put_hevc_uni_pel_pixels{2,6}_8 hevc_put_hevc_bi_pel_pixels{2,6}_8 hevc_put_hevc_{uni,bi}_epel_h2_8 hevc_put_hevc_{uni,bi}_epel_v2_8 hevc_put_hevc_{uni,bi}_epel_h6_8 hevc_put_hevc_{uni,bi}_epel_v6_8 hevc_put_hevc_{uni,bi}_epel_hv{2,6}_8 I think I'm not missing any. both instructions can be emulated using sse2, so the relevant functions could be duplicated to create an SSE2/SSSE3 variant, but that's for another time/patch. The rest are mostly SSSE3 because of pmaddubsw and pmulhrsw, and a few only SSE2. The qpel and epel tables also need to be renamed to remove the sse4 suffix (Which is unneeded). It will require some init macros rewriting to change, but leaving things as is will make atom, conroe and bobcat cpus miss a considerable performance boost. Probably I can do myself but your help would be welcome =) I don't have time nor really want to deal with the init macros, but i can help you with the necessary changes to the asm file if needed. lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions
--- This applies cleanly after PATCH 14/15, and of course requires relevant changes to hevc_init.c I think i got every function right, but in any case fixing any of them is a single line change. In the end, out of 190 functions, only 44 were SSE4. libavcodec/x86/hevc_mc.asm | 363 +++-- 1 file changed, 281 insertions(+), 82 deletions(-) diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index dac3295..4696fa8 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -30,8 +30,8 @@ zero: times 4 dd 0 one_per_32: times 4 dd 1 SECTION .text -%macro EPEL_TABLE 4 -hevc_epel_filters_%4_%1 times %2 d%3 -2, 58 +%macro EPEL_TABLE 3 +hevc_epel_filters_%1 times %2 d%3 -2, 58 times %2 d%3 10, -2 times %2 d%3 -4, 54 times %2 d%3 16, -2 @@ -49,11 +49,11 @@ hevc_epel_filters_%4_%1 times %2 d%3 -2, 58 -EPEL_TABLE 8, 8, b, sse4 -EPEL_TABLE 10, 4, w, sse4 +EPEL_TABLE 8, 8, b +EPEL_TABLE 10, 4, w -%macro QPEL_TABLE 4 -hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4 +%macro QPEL_TABLE 3 +hevc_qpel_filters_%1 times %2 d%3 -1, 4 times %2 d%3 -10, 58 times %2 d%3 17, -5 times %2 d%3 1, 0 @@ -67,10 +67,10 @@ hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4 times %2 d%3 4, -1 %endmacro -QPEL_TABLE 8, 8, b, sse4 -QPEL_TABLE 10, 4, w, sse4 +QPEL_TABLE 8, 8, b +QPEL_TABLE 10, 4, w -%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10 +%define hevc_qpel_filters_14 hevc_qpel_filters_10 %if ARCH_X86_64 @@ -114,9 +114,9 @@ QPEL_TABLE 10, 4, w, sse4 %macro EPEL_FILTER 2-4; bit depth, filter index %ifdef PIC -lea rfilterq, [hevc_epel_filters_sse4_%1] +lea rfilterq, [hevc_epel_filters_%1] %else -%define rfilterq hevc_epel_filters_sse4_%1 +%define rfilterq hevc_epel_filters_%1 %endif sub %2q, 1 shl %2q, 5 ; multiply by 32 @@ -131,9 +131,9 @@ QPEL_TABLE 10, 4, w, sse4 %macro EPEL_HV_FILTER 1 %ifdef PIC -lea rfilterq, [hevc_epel_filters_sse4_%1] +lea rfilterq, [hevc_epel_filters_%1] %else -%define rfilterq hevc_epel_filters_sse4_%1 +%define rfilterq hevc_epel_filters_%1 %endif sub mxq, 1 sub myq, 1 @@ -144,9 +144,9 @@ QPEL_TABLE 10, 4, w, sse4 lea r3srcq, [srcstrideq*3] %ifdef PIC -lea rfilterq, [hevc_epel_filters_sse4_10] +lea rfilterq, [hevc_epel_filters_10] %else -%define rfilterq hevc_epel_filters_sse4_10 +%define rfilterq hevc_epel_filters_10 %endif movdqa m12, [rfilterq + myq]; get 2 first values of filters movdqa m13, [rfilterq + myq+16] ; get 2 last values of filters @@ -154,9 +154,9 @@ QPEL_TABLE 10, 4, w, sse4 %macro QPEL_FILTER 2 %ifdef PIC -lea rfilterq, [hevc_qpel_filters_sse4_%1] +lea rfilterq, [hevc_qpel_filters_%1] %else -%define rfilterq hevc_qpel_filters_sse4_%1 +%define rfilterq hevc_qpel_filters_%1 %endif lea %2q, [%2q*8-8] movdqa m12, [rfilterq + %2q*8] ; get 4 first values of filters @@ -389,9 +389,9 @@ QPEL_TABLE 10, 4, w, sse4 %macro QPEL_HV_COMPUTE 4 ; width, bitdepth, filter idx %ifdef PIC -lea rfilterq, [hevc_qpel_filters_sse4_%2] +lea rfilterq, [hevc_qpel_filters_%2] %else -%define rfilterq hevc_qpel_filters_sse4_%2 +%define rfilterq hevc_qpel_filters_%2 %endif %if %2 == 8 @@ -498,7 +498,6 @@ QPEL_TABLE 10, 4, w, sse4 %endif %endmacro -INIT_XMM sse4; adds ff_ and _sse4 to function name ; ** ; void put_hevc_mc_pixels(int16_t *dst, ptrdiff_t dststride, ; uint8_t *_src, ptrdiff_t _srcstride, @@ -514,7 +513,9 @@ cglobal hevc_put_hevc_pel_pixels%1_%2, 5, 5, 3, dst, dststride, src, srcstride,h PEL_10STORE%1 dstq, m0, m1 LOOP_END dst, dststride, src, srcstride RET +%endmacro +%macro HEVC_PUT_HEVC_UNI_PEL_PIXELS 2 cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 3, dst, dststride, src, srcstride,height pxor m2, m2 .loop @@ -525,7 +526,9 @@ cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 3, dst, dststride, src, srcstri dec heightd ; cmp height jnz .loop ; height loop RET +%endmacro +%macro HEVC_PUT_HEVC_BI_PEL_PIXELS 2 cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst, dststride, src, srcstride, src2, src2stride,height pxor m2, m2 movdqam5, [pw_bi_%2] @@ -541,9 +544,44 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst,
Re: [libav-devel] [PATCH] mov: Mark a variable as unused
On 07/07/14 4:08 PM, Martin Storsjö wrote: This silences a warning with gcc. --- In my defense, clang didn't show this warning. --- libavformat/mov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 4a2d265..853c3e5 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -213,7 +213,7 @@ static int mov_metadata_loci(MOVContext *c, AVIOContext *pb, unsigned len) char language[4] = { 0 }; char buf[100]; uint16_t langcode = 0; -double longitude, latitude, altitude; +double longitude, latitude, av_unused(altitude); const char *key = location; if (len 4 + 2 + 1 + 1 + 4 + 4 + 4) Why not just remove the variable altogether and do an avio_rb32() or avio_skip() after latitude? Assuming avio_rb32() is needed at all to increase the pb pointer, that is. It would remove an unnecessary division. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 5/5] build: Add define for SIMD extensions requiring 16-byte aligned buffers
On 21/07/14 6:21 PM, Diego Biurrun wrote: --- I'm slightly unsure about this one. MMX does not require 16-byte aligned buffers, nor does PowerPC IIRC, but SSE and AltiVec do, so I believe my solution is closer to the original intention. Please do correct me if I am wrong... configure | 2 ++ libavcodec/utils.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) [...] diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 2abc376..16c30c3 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -189,7 +189,7 @@ int ff_side_data_update_matrix_encoding(AVFrame *frame, return 0; } -#if HAVE_NEON || ARCH_PPC || HAVE_MMX +#if HAVE_SIMD_ALIGN_16 # define STRIDE_ALIGN 16 #else # define STRIDE_ALIGN 8 #if HAVE_NEON || HAVE_ALTIVEC || HAVE_SSE # define STRIDE_ALIGN 16 #else # define STRIDE_ALIGN 8 #endif is simpler than adding another HAVE_ define for this single use. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3] mem: add av_strndup() for duplicating substrings
On 12/08/14 1:54 PM, Anton Khirnov wrote: --- doc/APIchanges | 3 +++ libavutil/mem.c | 20 libavutil/mem.h | 10 ++ libavutil/version.h | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) Shouldn't this be in avstring.h/c? ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 07/17] dxva2: Undefine _WIN32_WINNT before defining it
On 15/08/14 5:13 PM, Diego Biurrun wrote: This avoids a number of redefinition warnings. --- libavcodec/dxva2_internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h index f35a076..d50c0ff 100644 --- a/libavcodec/dxva2_internal.h +++ b/libavcodec/dxva2_internal.h @@ -23,6 +23,7 @@ #ifndef AVCODEC_DXVA_INTERNAL_H #define AVCODEC_DXVA_INTERNAL_H +#undef _WIN32_WINNT #define _WIN32_WINNT 0x0600 #define COBJMACROS _WIN32_WINNT may already be defined with a value higher than 0x0600 (For example when targeting Win7 or Win8), and this would be forcing it to a lower value. In practice and as far as libavcodec's DXVA2 support goes there's probably no difference, but the more correct thing to do would be check if it's already defined, and then only redefine it if it's 0x0600. For that matter, dxva2_internal.h includes dxva2.h, a header that also tries to define _WIN32_WINNT. It would be best to have all this only in dxva2.h to reduce code duplication. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 5/6] lavc: Add private API to manipulate AVPacketList
On 31/08/14 4:24 PM, Luca Barbato wrote: --- libavcodec/avcodec.h | 5 + libavcodec/avpacket.c | 56 ++ libavcodec/internal.h | 36 libavformat/avformat.h | 6 -- 4 files changed, 97 insertions(+), 6 deletions(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 270c6c8..116496f 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -1019,6 +1019,11 @@ typedef struct AVPacket { #define AV_PKT_FLAG_KEY 0x0001 /// The packet contains a keyframe #define AV_PKT_FLAG_CORRUPT 0x0002 /// The packet content is corrupted +typedef struct AVPacketList { +AVPacket pkt; +struct AVPacketList *next; +} AVPacketList; + enum AVSideDataParamChangeFlags { AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT = 0x0001, AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002, diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c index 25eabdb..bea12df 100644 --- a/libavcodec/avpacket.c +++ b/libavcodec/avpacket.c @@ -26,6 +26,7 @@ #include libavutil/internal.h #include libavutil/mathematics.h #include libavutil/mem.h +#include internal.h #include avcodec.h #if FF_API_DESTRUCT_PACKET @@ -393,3 +394,58 @@ void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb) if (pkt-convergence_duration 0) pkt-convergence_duration = av_rescale_q(pkt-convergence_duration, src_tb, dst_tb); } + +int ff_packet_list_put(AVPacketList **head, AVPacketList **tail, + AVPacket *pkt) avpriv_? (in all three functions). lavf can make good use of all this. And i think your original patchset did as much. Thanks for resurrecting this for that matter. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] jpeg2000: split off inverse MCT decoding as Jpeg2000DSP
This makes the addition of arch optimized functions easier. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/Makefile | 2 +- libavcodec/jpeg2000dec.c | 72 +-- libavcodec/jpeg2000dsp.c | 98 libavcodec/jpeg2000dsp.h | 35 + libavcodec/jpeg2000dwt.h | 3 +- 5 files changed, 154 insertions(+), 56 deletions(-) create mode 100644 libavcodec/jpeg2000dsp.c create mode 100644 libavcodec/jpeg2000dsp.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 178b61e..69b92b6 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -229,7 +229,7 @@ OBJS-$(CONFIG_INDEO4_DECODER) += indeo4.o ivi_common.o ivi_dsp.o OBJS-$(CONFIG_INDEO5_DECODER) += indeo5.o ivi_common.o ivi_dsp.o OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER) += dpcm.o OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o -OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o \ +OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o jpeg2000dsp.o \ jpeg2000dwt.o mqcdec.o mqc.o OBJS-$(CONFIG_JPEGLS_DECODER) += jpeglsdec.o jpegls.o OBJS-$(CONFIG_JPEGLS_ENCODER) += jpeglsenc.o jpegls.o diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index aed9b2b..5135297 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -35,6 +35,7 @@ #include internal.h #include thread.h #include jpeg2000.h +#include jpeg2000dsp.h #define JP2_SIG_TYPE0x6A502020 #define JP2_SIG_VALUE 0x0D0A870A @@ -85,6 +86,7 @@ typedef struct Jpeg2000DecoderContext { int16_t curtileno; Jpeg2000Tile*tile; +Jpeg2000DSPContext dsp; /*options parameters*/ int reduction_factor; @@ -1041,69 +1043,21 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk, } } -/* Inverse ICT parameters in float and integer. - * int value = (float value) * (116) */ -static const float f_ict_params[4] = { -1.402f, -0.34413f, -0.71414f, -1.772f -}; -static const int i_ict_params[4] = { - 91881, - 22553, - 46802, -116130 -}; - -static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile) +static inline void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile) { int i, csize = 1; -int32_t *src[3], i0, i1, i2; -float *srcf[3], i0f, i1f, i2f; +void *src[3]; for (i = 0; i 3; i++) if (tile-codsty[0].transform == FF_DWT97) -srcf[i] = tile-comp[i].f_data; +src[i] = tile-comp[i].f_data; else -src [i] = tile-comp[i].i_data; +src[i] = tile-comp[i].i_data; for (i = 0; i 2; i++) csize *= tile-comp[0].coord[i][1] - tile-comp[0].coord[i][0]; -switch (tile-codsty[0].transform) { -case FF_DWT97: -for (i = 0; i csize; i++) { -i0f = *srcf[0] + (f_ict_params[0] * *srcf[2]); -i1f = *srcf[0] - (f_ict_params[1] * *srcf[1]) - - (f_ict_params[2] * *srcf[2]); -i2f = *srcf[0] + (f_ict_params[3] * *srcf[1]); -*srcf[0]++ = i0f; -*srcf[1]++ = i1f; -*srcf[2]++ = i2f; -} -break; -case FF_DWT97_INT: -for (i = 0; i csize; i++) { -i0 = *src[0] + (((i_ict_params[0] * *src[2]) + (1 15)) 16); -i1 = *src[0] - (((i_ict_params[1] * *src[1]) + (1 15)) 16) - - (((i_ict_params[2] * *src[2]) + (1 15)) 16); -i2 = *src[0] + (((i_ict_params[3] * *src[1]) + (1 15)) 16); -*src[0]++ = i0; -*src[1]++ = i1; -*src[2]++ = i2; -} -break; -case FF_DWT53: -for (i = 0; i csize; i++) { -i1 = *src[0] - (*src[2] + *src[1] 2); -i0 = i1 + *src[2]; -i2 = i1 + *src[1]; -*src[0]++ = i0; -*src[1]++ = i1; -*src[2]++ = i2; -} -break; -} + +s-dsp.mct_decode[tile-codsty[0].transform](src[0], src[1], src[2], csize); } static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, @@ -1406,6 +1360,15 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s) return 0; } +static av_cold int jpeg2000_decode_init(AVCodecContext *avctx) +{ +Jpeg2000DecoderContext *s = avctx-priv_data; + +ff_jpeg2000dsp_init(s-dsp); + +return 0; +} + static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt) { @@ -1510,6 +1473,7 @@ AVCodec ff_jpeg2000_decoder = { .capabilities = CODEC_CAP_FRAME_THREADS, .priv_data_size = sizeof(Jpeg2000DecoderContext), .init_static_data = jpeg2000_init_static_data, +.init = jpeg2000_decode_init, .decode = jpeg2000_decode_frame
[libav-devel] [PATCH] jpeg2000: split off inverse MCT as Jpeg2000DSP
This makes the addition of arch optimized functions easier. Signed-off-by: James Almer jamr...@gmail.com --- Now with proper names for the dsp functions, Irreversible MCT (ICT) and Reversible MCT (RCT) respectively. No other changes. libavcodec/Makefile | 2 +- libavcodec/jpeg2000dec.c | 72 +-- libavcodec/jpeg2000dsp.c | 98 libavcodec/jpeg2000dsp.h | 35 + libavcodec/jpeg2000dwt.h | 3 +- 5 files changed, 154 insertions(+), 56 deletions(-) create mode 100644 libavcodec/jpeg2000dsp.c create mode 100644 libavcodec/jpeg2000dsp.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 178b61e..69b92b6 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -229,7 +229,7 @@ OBJS-$(CONFIG_INDEO4_DECODER) += indeo4.o ivi_common.o ivi_dsp.o OBJS-$(CONFIG_INDEO5_DECODER) += indeo5.o ivi_common.o ivi_dsp.o OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER) += dpcm.o OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o -OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o \ +OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o jpeg2000dsp.o \ jpeg2000dwt.o mqcdec.o mqc.o OBJS-$(CONFIG_JPEGLS_DECODER) += jpeglsdec.o jpegls.o OBJS-$(CONFIG_JPEGLS_ENCODER) += jpeglsenc.o jpegls.o diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index aed9b2b..5135297 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -35,6 +35,7 @@ #include internal.h #include thread.h #include jpeg2000.h +#include jpeg2000dsp.h #define JP2_SIG_TYPE0x6A502020 #define JP2_SIG_VALUE 0x0D0A870A @@ -85,6 +86,7 @@ typedef struct Jpeg2000DecoderContext { int16_t curtileno; Jpeg2000Tile*tile; +Jpeg2000DSPContext dsp; /*options parameters*/ int reduction_factor; @@ -1041,69 +1043,21 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk, } } -/* Inverse ICT parameters in float and integer. - * int value = (float value) * (116) */ -static const float f_ict_params[4] = { -1.402f, -0.34413f, -0.71414f, -1.772f -}; -static const int i_ict_params[4] = { - 91881, - 22553, - 46802, -116130 -}; - -static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile) +static inline void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile) { int i, csize = 1; -int32_t *src[3], i0, i1, i2; -float *srcf[3], i0f, i1f, i2f; +void *src[3]; for (i = 0; i 3; i++) if (tile-codsty[0].transform == FF_DWT97) -srcf[i] = tile-comp[i].f_data; +src[i] = tile-comp[i].f_data; else -src [i] = tile-comp[i].i_data; +src[i] = tile-comp[i].i_data; for (i = 0; i 2; i++) csize *= tile-comp[0].coord[i][1] - tile-comp[0].coord[i][0]; -switch (tile-codsty[0].transform) { -case FF_DWT97: -for (i = 0; i csize; i++) { -i0f = *srcf[0] + (f_ict_params[0] * *srcf[2]); -i1f = *srcf[0] - (f_ict_params[1] * *srcf[1]) - - (f_ict_params[2] * *srcf[2]); -i2f = *srcf[0] + (f_ict_params[3] * *srcf[1]); -*srcf[0]++ = i0f; -*srcf[1]++ = i1f; -*srcf[2]++ = i2f; -} -break; -case FF_DWT97_INT: -for (i = 0; i csize; i++) { -i0 = *src[0] + (((i_ict_params[0] * *src[2]) + (1 15)) 16); -i1 = *src[0] - (((i_ict_params[1] * *src[1]) + (1 15)) 16) - - (((i_ict_params[2] * *src[2]) + (1 15)) 16); -i2 = *src[0] + (((i_ict_params[3] * *src[1]) + (1 15)) 16); -*src[0]++ = i0; -*src[1]++ = i1; -*src[2]++ = i2; -} -break; -case FF_DWT53: -for (i = 0; i csize; i++) { -i1 = *src[0] - (*src[2] + *src[1] 2); -i0 = i1 + *src[2]; -i2 = i1 + *src[1]; -*src[0]++ = i0; -*src[1]++ = i1; -*src[2]++ = i2; -} -break; -} + +s-dsp.mct_decode[tile-codsty[0].transform](src[0], src[1], src[2], csize); } static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, @@ -1406,6 +1360,15 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s) return 0; } +static av_cold int jpeg2000_decode_init(AVCodecContext *avctx) +{ +Jpeg2000DecoderContext *s = avctx-priv_data; + +ff_jpeg2000dsp_init(s-dsp); + +return 0; +} + static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt) { @@ -1510,6 +1473,7 @@ AVCodec ff_jpeg2000_decoder = { .capabilities = CODEC_CAP_FRAME_THREADS, .priv_data_size = sizeof(Jpeg2000DecoderContext), .init_static_data
[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available
This silences warnings about passing arguments from incompatible pointer type when targeting Windows Vista or newer. Signed-off-by: James Almer jamr...@gmail.com --- Only tested with Mingw-w64 v3. Can someone test MSVC or ICL? I know the former defines the struct, so the configure check should succeed on that one at least. compat/w32pthreads.h | 23 +-- configure| 2 ++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index 2a7f323..2642661 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -55,12 +55,15 @@ typedef struct pthread_t { * not mutexes */ typedef CRITICAL_SECTION pthread_mutex_t; -/* This is the CONDITIONAL_VARIABLE typedef for using Window's native - * conditional variables on kernels 6.0+. - * MinGW does not currently have this typedef. */ +/* This is the CONDITION_VARIABLE typedef for using Window's native + * conditional variables on kernels 6.0+. */ +#if HAVE_CONDITION_VARIABLE_PTR +typedef CONDITION_VARIABLE pthread_cond_t; +#else typedef struct pthread_cond_t { -void *ptr; +void *Ptr; } pthread_cond_t; +#endif /* function pointers to conditional variable API on windows 6.0+ kernels */ #if _WIN32_WINNT 0x0600 @@ -159,7 +162,7 @@ static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_ win32_cond = av_mallocz(sizeof(win32_cond_t)); if (!win32_cond) return ENOMEM; -cond-ptr = win32_cond; +cond-Ptr = win32_cond; win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL); if (!win32_cond-semaphore) return ENOMEM; @@ -174,7 +177,7 @@ static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_ static av_unused void pthread_cond_destroy(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; /* native condition variables do not destroy */ if (cond_init) return; @@ -185,12 +188,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t *cond) pthread_mutex_destroy(win32_cond-mtx_waiter_count); pthread_mutex_destroy(win32_cond-mtx_broadcast); av_freep(win32_cond); -cond-ptr = NULL; +cond-Ptr = NULL; } static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_broadcast) { @@ -221,7 +224,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int last_waiter; if (cond_wait) { cond_wait(cond, mutex, INFINITE); @@ -253,7 +256,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mu static av_unused void pthread_cond_signal(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_signal) { cond_signal(cond); diff --git a/configure b/configure index 06a2d4e..cda463d 100755 --- a/configure +++ b/configure @@ -1767,6 +1767,7 @@ TOOLCHAIN_FEATURES= TYPES_LIST= +CONDITION_VARIABLE_Ptr socklen_t struct_addrinfo struct_group_source_req @@ -4719,6 +4720,7 @@ check_func_headers windows.h PeekNamedPipe check_func_headers windows.h SetConsoleTextAttribute check_func_headers windows.h Sleep check_func_headers windows.h VirtualAlloc +check_struct windows.h CONDITION_VARIABLE Ptr check_func_headers glob.h glob enabled xlib check_func_headers X11/Xlib.h X11/extensions/Xvlib.h XvGetPortAttribute -lXv -lX11 -lXext -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows
Wrap the function calls in a similar fashion to how it's being done with the critical section API. Signed-off-by: James Almer jamr...@gmail.com --- compat/w32pthreads.h | 64 +++- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index 2642661..693ef51 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -65,32 +65,6 @@ typedef struct pthread_cond_t { } pthread_cond_t; #endif -/* function pointers to conditional variable API on windows 6.0+ kernels */ -#if _WIN32_WINNT 0x0600 -static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); -static void (WINAPI *cond_init)(pthread_cond_t *cond); -static void (WINAPI *cond_signal)(pthread_cond_t *cond); -static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, -DWORD milliseconds); -#else -#define cond_init InitializeConditionVariable -#define cond_broadcast WakeAllConditionVariable -#define cond_signalWakeConditionVariable -#define cond_wait SleepConditionVariableCS - -#define CreateEvent(a, reset, init, name) \ -CreateEventEx(a, name, \ - (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \ - (init ? CREATE_EVENT_INITIAL_SET : 0),\ - EVENT_ALL_ACCESS) -// CreateSemaphoreExA seems to be desktop-only, but as long as we don't -// use named semaphores, it doesn't matter if we use the W version. -#define CreateSemaphore(a, b, c, d) \ -CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) -#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) -#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) -#endif - static av_unused unsigned __stdcall attribute_align_arg win32thread_worker(void *arg) { pthread_t *h = arg; @@ -139,6 +113,36 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m) return 0; } +#if _WIN32_WINNT = 0x0600 +static inline int pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) +{ +InitializeConditionVariable(cond); +return 0; +} + +/* native condition variables do not destroy */ +static inline void pthread_cond_destroy(pthread_cond_t *cond) +{ +return; +} + +static inline void pthread_cond_broadcast(pthread_cond_t *cond) +{ +WakeAllConditionVariable(cond); +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ +SleepConditionVariableCS(cond, mutex, INFINITE); +return 0; +} + +static inline void pthread_cond_signal(pthread_cond_t *cond) +{ +WakeConditionVariable(cond); +} + +#else // _WIN32_WINNT 0x0600 /* for pre-Windows 6.0 platforms we need to define and use our own condition * variable and api */ typedef struct win32_cond_t { @@ -150,6 +154,13 @@ typedef struct win32_cond_t { volatile int is_broadcast; } win32_cond_t; +/* function pointers to conditional variable API on windows 6.0+ kernels */ +static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); +static void (WINAPI *cond_init)(pthread_cond_t *cond); +static void (WINAPI *cond_signal)(pthread_cond_t *cond); +static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, +DWORD milliseconds); + static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) { win32_cond_t *win32_cond = NULL; @@ -278,6 +289,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t *cond) pthread_mutex_unlock(win32_cond-mtx_broadcast); } +#endif static av_unused void w32thread_init(void) { -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available
This silences warnings about passing arguments from incompatible pointer type when targeting Windows Vista or newer. Signed-off-by: James Almer jamr...@gmail.com --- Sent the wrong version earlier, my bad. The request for testing i made before still stands. compat/w32pthreads.h | 23 +-- configure| 2 ++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index d0b90e8..b905a95 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -54,12 +54,15 @@ typedef struct pthread_t { * not mutexes */ typedef CRITICAL_SECTION pthread_mutex_t; -/* This is the CONDITIONAL_VARIABLE typedef for using Window's native - * conditional variables on kernels 6.0+. - * MinGW does not currently have this typedef. */ +/* This is the CONDITION_VARIABLE typedef for using Window's native + * conditional variables on kernels 6.0+. */ +#if HAVE_CONDITION_VARIABLE_PTR +typedef CONDITION_VARIABLE pthread_cond_t; +#else typedef struct pthread_cond_t { -void *ptr; +void *Ptr; } pthread_cond_t; +#endif /* function pointers to conditional variable API on windows 6.0+ kernels */ #if _WIN32_WINNT 0x0600 @@ -158,7 +161,7 @@ static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused win32_cond = av_mallocz(sizeof(win32_cond_t)); if (!win32_cond) return; -cond-ptr = win32_cond; +cond-Ptr = win32_cond; win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL); if (!win32_cond-semaphore) return; @@ -172,7 +175,7 @@ static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused static av_unused void pthread_cond_destroy(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; /* native condition variables do not destroy */ if (cond_init) return; @@ -183,12 +186,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t *cond) pthread_mutex_destroy(win32_cond-mtx_waiter_count); pthread_mutex_destroy(win32_cond-mtx_broadcast); av_freep(win32_cond); -cond-ptr = NULL; +cond-Ptr = NULL; } static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_broadcast) { @@ -219,7 +222,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int last_waiter; if (cond_wait) { cond_wait(cond, mutex, INFINITE); @@ -251,7 +254,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mu static av_unused void pthread_cond_signal(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_signal) { cond_signal(cond); diff --git a/configure b/configure index d87871e..a82bef7 100755 --- a/configure +++ b/configure @@ -1498,6 +1498,7 @@ TOOLCHAIN_FEATURES= TYPES_LIST= +CONDITION_VARIABLE_Ptr socklen_t struct_addrinfo struct_group_source_req @@ -4088,6 +4089,7 @@ check_func_headers windows.h MapViewOfFile check_func_headers windows.h SetConsoleTextAttribute check_func_headers windows.h Sleep check_func_headers windows.h VirtualAlloc +check_struct windows.h CONDITION_VARIABLE Ptr check_header direct.h check_header dlfcn.h -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows
Wrap the function calls in a similar fashion to how it's being done with the critical section API. Signed-off-by: James Almer jamr...@gmail.com --- compat/w32pthreads.h | 63 ++-- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index b905a95..e586ecb 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -64,32 +64,6 @@ typedef struct pthread_cond_t { } pthread_cond_t; #endif -/* function pointers to conditional variable API on windows 6.0+ kernels */ -#if _WIN32_WINNT 0x0600 -static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); -static void (WINAPI *cond_init)(pthread_cond_t *cond); -static void (WINAPI *cond_signal)(pthread_cond_t *cond); -static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, -DWORD milliseconds); -#else -#define cond_init InitializeConditionVariable -#define cond_broadcast WakeAllConditionVariable -#define cond_signalWakeConditionVariable -#define cond_wait SleepConditionVariableCS - -#define CreateEvent(a, reset, init, name) \ -CreateEventEx(a, name, \ - (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \ - (init ? CREATE_EVENT_INITIAL_SET : 0),\ - EVENT_ALL_ACCESS) -// CreateSemaphoreExA seems to be desktop-only, but as long as we don't -// use named semaphores, it doesn't matter if we use the W version. -#define CreateSemaphore(a, b, c, d) \ -CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) -#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) -#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) -#endif - static av_unused unsigned __stdcall attribute_align_arg win32thread_worker(void *arg) { pthread_t *h = arg; @@ -138,6 +112,35 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m) return 0; } +#if _WIN32_WINNT = 0x0600 +static inline void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) +{ +InitializeConditionVariable(cond); +} + +/* native condition variables do not destroy */ +static inline void pthread_cond_destroy(pthread_cond_t *cond) +{ +return; +} + +static inline void pthread_cond_broadcast(pthread_cond_t *cond) +{ +WakeAllConditionVariable(cond); +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ +SleepConditionVariableCS(cond, mutex, INFINITE); +return 0; +} + +static inline void pthread_cond_signal(pthread_cond_t *cond) +{ +WakeConditionVariable(cond); +} + +#else // _WIN32_WINNT 0x0600 /* for pre-Windows 6.0 platforms we need to define and use our own condition * variable and api */ typedef struct win32_cond_t { @@ -149,6 +152,13 @@ typedef struct win32_cond_t { volatile int is_broadcast; } win32_cond_t; +/* function pointers to conditional variable API on windows 6.0+ kernels */ +static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); +static void (WINAPI *cond_init)(pthread_cond_t *cond); +static void (WINAPI *cond_signal)(pthread_cond_t *cond); +static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, +DWORD milliseconds); + static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) { win32_cond_t *win32_cond = NULL; @@ -276,6 +286,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t *cond) pthread_mutex_unlock(win32_cond-mtx_broadcast); } +#endif static av_unused void w32thread_init(void) { -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows
On 09/10/14 5:03 AM, Martin Storsjö wrote: On Wed, 8 Oct 2014, James Almer wrote: Wrap the function calls in a similar fashion to how it's being done with the critical section API. Signed-off-by: James Almer jamr...@gmail.com --- compat/w32pthreads.h | 63 ++-- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index b905a95..e586ecb 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -64,32 +64,6 @@ typedef struct pthread_cond_t { } pthread_cond_t; #endif -/* function pointers to conditional variable API on windows 6.0+ kernels */ -#if _WIN32_WINNT 0x0600 -static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); -static void (WINAPI *cond_init)(pthread_cond_t *cond); -static void (WINAPI *cond_signal)(pthread_cond_t *cond); -static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, -DWORD milliseconds); -#else -#define cond_init InitializeConditionVariable -#define cond_broadcast WakeAllConditionVariable -#define cond_signalWakeConditionVariable -#define cond_wait SleepConditionVariableCS - -#define CreateEvent(a, reset, init, name) \ -CreateEventEx(a, name, \ - (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \ - (init ? CREATE_EVENT_INITIAL_SET : 0),\ - EVENT_ALL_ACCESS) -// CreateSemaphoreExA seems to be desktop-only, but as long as we don't -// use named semaphores, it doesn't matter if we use the W version. -#define CreateSemaphore(a, b, c, d) \ -CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) -#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) -#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) -#endif - Where did the CreateEvent/CreateSemaphore/InitializeCriticalSection/WaitForSingleObject definitions go here? When targeting desktop windows they don't matter (since the old functions still exist), but when targeting WinRT/WinPhone, the old functions are no longer available. You're right about InitializeCriticalSection and WaitForSingleObject (I somehow missed those), but the redefinition of CreateEvent and CreateSemaphore are not needed anymore since they will now be used only for the non-native version of the condition variable API, which is only compiled when _WIN32_WINT 0x0600. I'll send a patch to put the former two back in place. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows
Wrap the function calls in a similar fashion to how it's being done with the critical section API. Signed-off-by: James Almer jamr...@gmail.com --- compat/w32pthreads.h | 60 +--- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index f8eb0c8..3748289 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -64,28 +64,7 @@ typedef struct pthread_cond_t { } pthread_cond_t; #endif -/* function pointers to conditional variable API on windows 6.0+ kernels */ -#if _WIN32_WINNT 0x0600 -static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); -static void (WINAPI *cond_init)(pthread_cond_t *cond); -static void (WINAPI *cond_signal)(pthread_cond_t *cond); -static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, -DWORD milliseconds); -#else -#define cond_init InitializeConditionVariable -#define cond_broadcast WakeAllConditionVariable -#define cond_signalWakeConditionVariable -#define cond_wait SleepConditionVariableCS - -#define CreateEvent(a, reset, init, name) \ -CreateEventEx(a, name, \ - (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \ - (init ? CREATE_EVENT_INITIAL_SET : 0),\ - EVENT_ALL_ACCESS) -// CreateSemaphoreExA seems to be desktop-only, but as long as we don't -// use named semaphores, it doesn't matter if we use the W version. -#define CreateSemaphore(a, b, c, d) \ -CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) +#if _WIN32_WINNT = 0x0600 #define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) #define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) #endif @@ -138,6 +117,35 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m) return 0; } +#if _WIN32_WINNT = 0x0600 +static inline void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) +{ +InitializeConditionVariable(cond); +} + +/* native condition variables do not destroy */ +static inline void pthread_cond_destroy(pthread_cond_t *cond) +{ +return; +} + +static inline void pthread_cond_broadcast(pthread_cond_t *cond) +{ +WakeAllConditionVariable(cond); +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ +SleepConditionVariableCS(cond, mutex, INFINITE); +return 0; +} + +static inline void pthread_cond_signal(pthread_cond_t *cond) +{ +WakeConditionVariable(cond); +} + +#else // _WIN32_WINNT 0x0600 /* for pre-Windows 6.0 platforms we need to define and use our own condition * variable and api */ typedef struct win32_cond_t { @@ -149,6 +157,13 @@ typedef struct win32_cond_t { volatile int is_broadcast; } win32_cond_t; +/* function pointers to conditional variable API on windows 6.0+ kernels */ +static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); +static void (WINAPI *cond_init)(pthread_cond_t *cond); +static void (WINAPI *cond_signal)(pthread_cond_t *cond); +static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, +DWORD milliseconds); + static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) { win32_cond_t *win32_cond = NULL; @@ -276,6 +291,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t *cond) pthread_mutex_unlock(win32_cond-mtx_broadcast); } +#endif static av_unused void w32thread_init(void) { -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available
This silences warnings about passing arguments from incompatible pointer type when targeting Windows Vista or newer. Signed-off-by: James Almer jamr...@gmail.com --- compat/w32pthreads.h | 23 +-- configure| 2 ++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h index d0b90e8..f8eb0c8 100644 --- a/compat/w32pthreads.h +++ b/compat/w32pthreads.h @@ -54,12 +54,15 @@ typedef struct pthread_t { * not mutexes */ typedef CRITICAL_SECTION pthread_mutex_t; -/* This is the CONDITIONAL_VARIABLE typedef for using Window's native - * conditional variables on kernels 6.0+. - * MinGW does not currently have this typedef. */ +/* This is the CONDITION_VARIABLE typedef for using Windows' native + * conditional variables on kernels 6.0+. */ +#if HAVE_CONDITION_VARIABLE_PTR +typedef CONDITION_VARIABLE pthread_cond_t; +#else typedef struct pthread_cond_t { -void *ptr; +void *Ptr; } pthread_cond_t; +#endif /* function pointers to conditional variable API on windows 6.0+ kernels */ #if _WIN32_WINNT 0x0600 @@ -158,7 +161,7 @@ static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused win32_cond = av_mallocz(sizeof(win32_cond_t)); if (!win32_cond) return; -cond-ptr = win32_cond; +cond-Ptr = win32_cond; win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL); if (!win32_cond-semaphore) return; @@ -172,7 +175,7 @@ static av_unused void pthread_cond_init(pthread_cond_t *cond, const void *unused static av_unused void pthread_cond_destroy(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; /* native condition variables do not destroy */ if (cond_init) return; @@ -183,12 +186,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t *cond) pthread_mutex_destroy(win32_cond-mtx_waiter_count); pthread_mutex_destroy(win32_cond-mtx_broadcast); av_freep(win32_cond); -cond-ptr = NULL; +cond-Ptr = NULL; } static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_broadcast) { @@ -219,7 +222,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t *cond) static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int last_waiter; if (cond_wait) { cond_wait(cond, mutex, INFINITE); @@ -251,7 +254,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mu static av_unused void pthread_cond_signal(pthread_cond_t *cond) { -win32_cond_t *win32_cond = cond-ptr; +win32_cond_t *win32_cond = cond-Ptr; int have_waiter; if (cond_signal) { cond_signal(cond); diff --git a/configure b/configure index d87871e..a82bef7 100755 --- a/configure +++ b/configure @@ -1498,6 +1498,7 @@ TOOLCHAIN_FEATURES= TYPES_LIST= +CONDITION_VARIABLE_Ptr socklen_t struct_addrinfo struct_group_source_req @@ -4088,6 +4089,7 @@ check_func_headers windows.h MapViewOfFile check_func_headers windows.h SetConsoleTextAttribute check_func_headers windows.h Sleep check_func_headers windows.h VirtualAlloc +check_struct windows.h CONDITION_VARIABLE Ptr check_header direct.h check_header dlfcn.h -- 2.0.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/4] lavc: add a public API for parsing vorbis packets.
On 29/10/14 12:46 PM, Anton Khirnov wrote: It is required by (at least) the ogg demuxer. Mark the current semi-public apriv API for removal. --- doc/APIchanges | 3 +++ libavcodec/Makefile| 12 ++ libavcodec/version.h | 4 ++-- libavcodec/vorbis_parse.h | 58 ++ libavcodec/vorbis_parser.c | 52 - libavcodec/vorbis_parser.h | 7 -- 6 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 libavcodec/vorbis_parse.h Maybe call the internal header vorbis_parser_internal.h, and the public one vorbis_parser.h? That's how it was done for dv_profile, and is less confusing. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] vf_interlace: x86: improve asm performance
On 24/11/14 8:05 PM, Vittorio Giovara wrote: On Mon, Nov 24, 2014 at 5:58 PM, Henrik Gramner hen...@gramner.com wrote: +mova m2, [r2+r1] +mova m3, [r2+r1+mmsize] +pxor m2, m6 +pxor m3, m6 pxor m2, m6, [r2+r1] pxor m3, m6, [r2+r1+mmsize] Avoids two moves in AVX, otherwise LGTM. queued, thanks for the suggestion Looking at the committed code, you followed Henrik's suggestion about the pxor lines, but you didn't remove the mova lines, which are now redundant. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] Fwd: Bug#771126: libav: contains non-DFSG image file tests/lena.pnm
On 27/11/14 2:43 PM, Reinhard Tartler wrote: Hi, it seems that tests/lena.pnm is not really redistributable. I'm proposing to replace it with an image I've taken this summer. The patch itself is too large to post it here, which is why I've uploaded it to https://github.com/libav/libav/pull/17. Luca seems okay with it, koda suggested to ask here anyways. OK to push to master? any suggestions for the commit message? Reinhard Isn't it cleaner to just upload lena to the fate suit instead of having to update the reference files for all the relevant tests? ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] Fwd: Bug#771126: libav: contains non-DFSG image file tests/lena.pnm
On 27/11/14 3:03 PM, Reinhard Tartler wrote: On Thu, Nov 27, 2014 at 12:59 PM, James Almer jamr...@gmail.com wrote: On 27/11/14 2:43 PM, Reinhard Tartler wrote: Hi, it seems that tests/lena.pnm is not really redistributable. I'm proposing to replace it with an image I've taken this summer. The patch itself is too large to post it here, which is why I've uploaded it to https://github.com/libav/libav/pull/17. Luca seems okay with it, koda suggested to ask here anyways. OK to push to master? any suggestions for the commit message? Reinhard Isn't it cleaner to just upload lena to the fate suit instead of having to update the reference files for all the relevant tests? How is that cleaner than replacing non-redistributable files with a perfectly free one? Cleaner commit. It would be a matter of changing the path of lena.pnm to the samples directory. A couple lines change. Moving it to fate ensures that no tests can be executed without fate (currently, there are some). vsynth and asynth dependent tests would still run without the fate suit. Anyway, just my 2 cents. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing
On 12/12/14 5:57 PM, Vittorio Giovara wrote: From: James Almer jamr...@gmail.com Signed-off-by: James Almer jamr...@gmail.com Signed-off-by: Michael Niedermayer michae...@gmx.at Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com --- Changelog | 1 + libavformat/Makefile | 1 + libavformat/oggdec.c | 1 + libavformat/oggdec.h | 1 + libavformat/oggparsevp8.c | 142 ++ libavformat/version.h | 4 +- 6 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 libavformat/oggparsevp8.c [...] diff --git a/libavformat/oggparsevp8.c b/libavformat/oggparsevp8.c new file mode 100644 index 000..1256bfe --- /dev/null +++ b/libavformat/oggparsevp8.c @@ -0,0 +1,142 @@ +/* + * On2 VP8 parser for Ogg + * Copyright (C) 2013 James Almer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include libavutil/intreadwrite.h + +#include avformat.h +#include internal.h +#include oggdec.h + +#define VP8_HEADER_SIZE 26 + +static int vp8_header(AVFormatContext *s, int idx) +{ +struct ogg *ogg = s-priv_data; +struct ogg_stream *os = ogg-streams + idx; +uint8_t *p = os-buf + os-pstart; +AVStream *st = s-streams[idx]; +AVRational framerate; + +if (os-psize 7 || p[0] != 0x4f) +return 0; + +switch (p[5]){ +case 0x01: +if (os-psize VP8_HEADER_SIZE) { +av_log(s, AV_LOG_ERROR, Invalid OggVP8 header packet); +return AVERROR_INVALIDDATA; +} + +if (p[6] != 1) { +av_log(s, AV_LOG_WARNING, + Unknown OggVP8 version %d.%d\n, p[6], p[7]); +return AVERROR_INVALIDDATA; +} + +st-codec-width= AV_RB16(p + 8); +st-codec-height = AV_RB16(p + 10); +st-sample_aspect_ratio.num = AV_RB24(p + 12); +st-sample_aspect_ratio.den = AV_RB24(p + 15); +framerate.den = AV_RB32(p + 18); +framerate.num = AV_RB32(p + 22); + +avpriv_set_pts_info(st, 64, framerate.num, framerate.den); +st-codec-codec_type = AVMEDIA_TYPE_VIDEO; +st-codec-codec_id = AV_CODEC_ID_VP8; +st-need_parsing = AVSTREAM_PARSE_HEADERS; +break; +case 0x02: +if (p[6] != 0x20) +return AVERROR_INVALIDDATA; +ff_vorbis_comment(s, st-metadata, p + 7, os-psize - 7, 1); ff_vorbis_stream_comment() for consistency with the other parsers. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing
On 16/12/14 11:58 AM, Vittorio Giovara wrote: From: James Almer jamr...@gmail.com Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com --- Dropped the sign-offs since the file was modified. Addressed Anton's and James' comment. Vittorio Changelog | 1 + libavformat/Makefile | 1 + libavformat/oggdec.c | 1 + libavformat/oggdec.h | 1 + libavformat/oggparsevp8.c | 142 ++ libavformat/version.h | 4 +- 6 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 libavformat/oggparsevp8.c Please undo the change Anton requested. It was correct in the first patch. Check the samples from http://people.freedesktop.org/~slomo/ogg-vp8/ and see the framerate it reports for them. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing
On 16/12/14 3:05 PM, Vittorio Giovara wrote: On Tue, Dec 16, 2014 at 6:10 PM, James Almer jamr...@gmail.com wrote: On 16/12/14 11:58 AM, Vittorio Giovara wrote: From: James Almer jamr...@gmail.com Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com --- Dropped the sign-offs since the file was modified. Addressed Anton's and James' comment. Vittorio Changelog | 1 + libavformat/Makefile | 1 + libavformat/oggdec.c | 1 + libavformat/oggdec.h | 1 + libavformat/oggparsevp8.c | 142 ++ libavformat/version.h | 4 +- 6 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 libavformat/oggparsevp8.c Please undo the change Anton requested. It was correct in the first patch. Check the samples from http://people.freedesktop.org/~slomo/ogg-vp8/ and see the framerate it reports for them. Thanks for the link. According to the specifications hosted there http://people.freedesktop.org/~slomo/ogg-vp8/ogg-vp8.pdf it looks like numerator and denominator are parsed wrong. your code st-codec-width= AV_RB16(p + 8); st-codec-height = AV_RB16(p + 10); st-sample_aspect_ratio.num = AV_RB24(p + 12); st-sample_aspect_ratio.den = AV_RB24(p + 15); framerate.den = AV_RB32(p + 18); framerate.num = AV_RB32(p + 22); spec code FW 16 Stored frame width. FH 16 Stored frame height. PARN 24 Pixel aspect ratio numerator. PARD 24 Pixel aspect ratio denominator. FPSN 32 Frame rate numerator. FPSD 32 Frame rate denominator So it looks like the change Anton requested was correct and the AV_RB32 need to be inverted, unless I am missing something. James, can you confirm for me please? Yes, that should work as well. On an unrelated note, would it be possible to have a fate test? Thanks Sure, I'll send one after this is committed. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3] dca: Support for XLL (lossless extension)
On 16/03/15 5:00 AM, Niels Möller wrote: James Almer jamr...@gmail.com writes: Valgrind is complaining about this code (Conditional jump or move depends on uninitialised value error), as seen here https://fate.libav.org/x86_64-linux-gcc-valgrind/20150316044429 Zero initializing the param_state[16] struct from ff_dca_xll_decode_audio() with { { 0 } } fixes it, but it's possible it may instead be hiding the real bug in the code. If I read the code correctly, it looks like params-pancABIT0 is read from the stream for the first segment (seg == 0) only, and used for decoding params-nSamplePart0 samples. And that the latter value ought to be always zero when seg != 0. The logic is a bit complex, and since it many months since I wrote that code, I don't quite remember how it is supposed to work... But I suspect the problem is that the value, which is a loop invariant, is read and tested up-front, even in the case that the loop using it runs for zero iterations. Can you test if the below patch solves the problem? It reads params-pancABIT0 only when it's going to be used. Regards, /Niels diff --git a/libavcodec/dca_xll.c b/libavcodec/dca_xll.c index 0c32d6e..5a558b8 100644 --- a/libavcodec/dca_xll.c +++ b/libavcodec/dca_xll.c @@ -514,8 +514,8 @@ int ff_dca_xll_decode_audio(DCAContext *s, AVFrame *frame) } for (i = 0; i chset-channels; i++) { int param_index = params-seg_type ? 0 : i; -int bits= params-pancABIT0[param_index]; int part0 = params-nSamplPart0[param_index]; +int bits= part0 ? params-pancABIT0[param_index] : 0; int *sample_buf = s-xll_sample_buf + (in_channel + i) * s-xll_smpl_in_seg; Yes, it fixes it on my end. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3] dca: Support for XLL (lossless extension)
On 13/03/15 12:24 PM, Luca Barbato wrote: On 13/03/15 16:17, Diego Biurrun wrote: From: Niels Möller ni...@lysator.liu.se --- Changes since last round: - XLL disabled by default. - Return error on too many downmix coefficients This has survived Oracle, so it's good to go IMO and will hit the tree very soon, barring last minute comments/objections. Fine for me. lu Valgrind is complaining about this code (Conditional jump or move depends on uninitialised value error), as seen here https://fate.libav.org/x86_64-linux-gcc-valgrind/20150316044429 Zero initializing the param_state[16] struct from ff_dca_xll_decode_audio() with { { 0 } } fixes it, but it's possible it may instead be hiding the real bug in the code. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] atestsrc: Initial implementation
On 12/03/15 1:14 PM, Luca Barbato wrote: On 12/03/15 13:54, Derek Buitenhuis wrote: On 3/11/2015 7:07 PM, Luca Barbato wrote: On top of it, I wasn't aware it exists The feature has been complete and in FFmpeg master for 1.5+ years... I do not think pretend FFmpeg doesn't exist and never look at it at all LALALAL I CAN'T HEAR YOU is a valid stance to take as a competitive library. The only ones who suffer are users. Sure would be nice to add more features that are present in FFmpeg, but looks like you and Hendrik missed the purpose of this patch. I want to write a walk through in the form of blogposts, so I need to write something quite simple and possibly improve it incrementally. I did already for the demuxer (that maybe could enjoy a second post with the feedback martin gave) that I know better. Since I do not know so well avfilter, I wanted to see if even this minimal audio source is right before blogging (for the demuxer I blogged and then sent the code to the ml). lu Either write it, blog about it and do not apply the end result to the tree, or write a different simple filter instead. There are surely many filters ideas not yet written in any of the two projects one could write from scratch. I'm doing it because i want to write a blog about how to write a filter is not a good reason to create and apply a second implementation that inconveniences the end user. Or alternatively you could, after finishing writing this filter and the blog post, port aevalsrc and cherry pick code from your filter to improve the former. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] tiff: Return more meaningful error codes
On 28/03/15 2:52 PM, Justin Ruggles wrote: On 03/28/2015 01:42 PM, Himangi Saraogi wrote: --- libavcodec/tiffenc.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/tiffenc.c b/libavcodec/tiffenc.c index 169360f..46e4207 100644 --- a/libavcodec/tiffenc.c +++ b/libavcodec/tiffenc.c @@ -153,7 +153,8 @@ static int add_entry1(TiffEncoderContext *s, * @param dst Output buffer * @param n Size of input buffer * @param compr Compression method - * @return Number of output bytes. If an output error is encountered, -1 returned + * @return Number of output bytes. If an output error is encountered, a negative + * value corresponding to an AVERROR error code is returned. */ static int encode_strip(TiffEncoderContext *s, const int8_t *src, uint8_t *dst, int n, int compr) @@ -166,14 +167,14 @@ static int encode_strip(TiffEncoderContext *s, const int8_t *src, unsigned long zlen = s-buf_size - (*s-buf - s-buf_start); if (compress(dst, zlen, src, n) != Z_OK) { av_log(s-avctx, AV_LOG_ERROR, Compressing failed\n); -return -1; +return AVERROR_INVALIDDATA; This is an unknown error from an external library, so AVERROR_UNKNOWN should be returned. } return zlen; } #endif case TIFF_RAW: if (check_size(s, n)) -return -1; +return AVERROR(EINVAL); memcpy(dst, src, n); return n; case TIFF_PACKBITS: @@ -182,7 +183,7 @@ static int encode_strip(TiffEncoderContext *s, const int8_t *src, case TIFF_LZW: return ff_lzw_encode(s-lzws, src, n); default: -return -1; +return AVERROR_UNKNOWN; Should be AVERROR_BUG since compression type is an AVOption that has defined bounds. No, this should be AVERROR(EINVAL) because even inside the bounds there are several values for compressions that are not currently supported. i can do avconv -i INPUT -compression_algo 2 OUTPUT and it wouldn't be a bug, it would be an invalid argument. } } @@ -291,7 +292,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, default: av_log(s-avctx, AV_LOG_ERROR, This colors format is not supported\n); -return -1; +return AVERROR_INVALIDDATA; This really never should happen in practice, but at any rate the correct error value is AVERROR(EINVAL) because it is an unsupported/invalid field set by the user. } if (s-compr == TIFF_DEFLATE || Thanks, Justin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] Canopus HQ/HQA decoder
On 22/03/15 12:49 PM, Vittorio Giovara wrote: +// AAN IDCT If this isn't already in the tree somewhere and it's generic enough that it can be reused, then it should be shared like faanidct and added to idctdsp. And if it's HQ/HQA specific, it still could be split into a new hqdsp context for potential optimizations. + +#define FIX_1_082 17734 +#define FIX_1_847 30274 +#define FIX_1_414 23170 +#define FIX_2_613 21407 // divided by two to fit the range + +#define IDCTMUL(a, b) ((a) * (b) 16) + +static inline void idct_row(int16_t *blk) +{ +int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA; +int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14; + +tmp0 = blk[5] - blk[3]; +tmp1 = blk[5] + blk[3]; +tmp2 = blk[1] - blk[7]; +tmp3 = blk[1] + blk[7]; +tmp4 = tmp3 - tmp1; +tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847); +tmp6 = IDCTMUL(tmp2,FIX_1_082) - tmp5; +tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2; +tmp8 = tmp3 + tmp1; +tmp9 = tmp7 * 4 - tmp8; +tmpA = IDCTMUL(tmp4, FIX_1_414) * 4 - tmp9; +tmpB = tmp6 * 4 + tmpA; +tmpC = blk[2] + blk[6]; +tmpD = blk[2] - blk[6]; +tmpE = blk[0] - blk[4]; +tmpF = blk[0] + blk[4]; + +tmp10 = IDCTMUL(tmpD, FIX_1_414) * 4 - tmpC; +tmp11 = tmpE - tmp10; +tmp12 = tmpF - tmpC; +tmp13 = tmpE + tmp10; +tmp14 = tmpF + tmpC; + +blk[0] = tmp14 + tmp8; +blk[1] = tmp13 + tmp9; +blk[2] = tmp11 + tmpA; +blk[3] = tmp12 - tmpB; +blk[4] = tmp12 + tmpB; +blk[5] = tmp11 - tmpA; +blk[6] = tmp13 - tmp9; +blk[7] = tmp14 - tmp8; +} + +static inline void idct_col(int16_t *blk) +{ +int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA; +int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14; + +tmp0 = blk[5 * 8] - blk[3 * 8]; +tmp1 = blk[5 * 8] + blk[3 * 8]; +tmp2 = blk[1 * 8] * 2 - (blk[7 * 8] 2); +tmp3 = blk[1 * 8] * 2 + (blk[7 * 8] 2); +tmp4 = tmp3 - tmp1; +tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847); +tmp6 = IDCTMUL(tmp2,FIX_1_082) - tmp5; +tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2; +tmp8 = (tmp3 + tmp1) 1; +tmp9 = tmp7 * 2 - tmp8; +tmpA = IDCTMUL(tmp4, FIX_1_414) * 2 - tmp9; +tmpB = tmp6 * 2 + tmpA; +tmpC = blk[2 * 8] + (blk[6 * 8] 1) 1; +tmpD = blk[2 * 8] - (blk[6 * 8] 1); +tmpE = (blk[0 * 8] 1) - (blk[4 * 8] 1) + 0x2020; +tmpF = (blk[0 * 8] 1) + (blk[4 * 8] 1) + 0x2020; + +tmp10 = IDCTMUL(tmpD, FIX_1_414) * 2 - tmpC; +tmp11 = tmpE - tmp10; +tmp12 = tmpF - tmpC; +tmp13 = tmpE + tmp10; +tmp14 = tmpF + tmpC; + +blk[0 * 8] = (tmp14 + tmp8) 6; +blk[1 * 8] = (tmp13 + tmp9) 6; +blk[2 * 8] = (tmp11 + tmpA) 6; +blk[3 * 8] = (tmp12 - tmpB) 6; +blk[4 * 8] = (tmp12 + tmpB) 6; +blk[5 * 8] = (tmp11 - tmpA) 6; +blk[6 * 8] = (tmp13 - tmp9) 6; +blk[7 * 8] = (tmp14 - tmp8) 6; +} + +static void hq_idct_put(uint8_t *dst, int stride, int16_t *block) +{ +int i, j; + +for (i = 0; i 8; i++) +idct_row(block + i * 8); +for (i = 0; i 8; i++) +idct_col(block + i); + +// or use IDCTDSPContext.put_pixels_clamped() Bench and see if it's worth using? There's an optimized version for most platforms after all. +for (i = 0; i 8; i++) { +for (j = 0; j 8; j++) +dst[j] = av_clip_uint8(block[j + i * 8]); +dst += stride; +} +} + +static inline void put_blocks(HQContext *c, AVFrame *pic, + int plane, int x, int y, int ilace, + int16_t *block0, int16_t *block1) +{ +uint8_t *p = pic-data[plane] + x; + +hq_idct_put(p + y * pic-linesize[plane], +pic-linesize[plane] ilace, block0); +hq_idct_put(p + (y + (ilace ? 1 : 8)) * pic-linesize[plane], +pic-linesize[plane] ilace, block1); +} ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] avcodec: add libdcadec decoder
On 23/03/15 10:23 AM, Luca Barbato wrote: On 23/03/15 12:45, Hendrik Leppkes wrote: --- configure | 4 + libavcodec/Makefile| 1 + libavcodec/allcodecs.c | 1 + libavcodec/libdcadec.c | 197 + 4 files changed, 203 insertions(+) create mode 100644 libavcodec/libdcadec.c diff --git a/configure b/configure index 3c38a8c..6eaac29 100755 --- a/configure +++ b/configure @@ -184,6 +184,7 @@ External library support: --enable-libcdio enable audio CD grabbing with libcdio --enable-libdc1394 enable IIDC-1394 grabbing using libdc1394 and libraw1394 [no] + --enable-libdcadec enable DCA decoding via libdcadec [no] --enable-libfaac enable AAC encoding via libfaac [no] --enable-libfdk-aac enable AAC de/encoding via libfdk-aac [no] --enable-libfreetype enable libfreetype [no] @@ -1149,6 +1150,7 @@ EXTERNAL_LIBRARY_LIST= libbs2b libcdio libdc1394 +libdcadec libfaac libfdk_aac libfontconfig @@ -2004,6 +2006,7 @@ mpeg4video_parser_select=error_resilience h263dsp mpeg_er mpegvideo qpeldsp vc1_parser_select=mpegvideo startcode vc1_decoder # external libraries +libdcadec_decoder_deps=libdcadec libfaac_encoder_deps=libfaac libfaac_encoder_select=audio_frame_queue libfdk_aac_decoder_deps=libfdk_aac @@ -4206,6 +4209,7 @@ enabled avisynth { { check_header avisynth/avisynth_c.h check_l enabled frei0r { check_header frei0r.h || die ERROR: frei0r.h header not found; } enabled gnutls require_pkg_config gnutls gnutls/gnutls.h gnutls_global_init enabled libbs2brequire_pkg_config libbs2b bs2b.h bs2b_open +enabled libdcadec require libdcadec libdcadec/dca_context.h dcadec_context_create -ldcadec I'll get libdcadec a pkgconf file =p It has one already. https://github.com/foo86/dcadec/commit/1ddd3b5547c33b36093c0786632c1287714252c6 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 01/14] Move AVAudioServiceType enum from lavc to lavu
On 02/05/15 5:22 AM, Luca Barbato wrote: On 02/05/15 06:23, Anton Khirnov wrote: Quoting Vittorio Giovara (2015-05-02 01:17:08) The enum is used by lavc, lavf and lavfi, and it is referenced by lavu, so it sementically belongs to lavu more than any other. This change allows to drop an avcodec.h inclusion from avfilter.h. I would disagree here, since this logic would apply to any side data struct whatsoever. And I don't think they should all be in lavu. libav(meta)data ? =) Might be nice split libavutil a little so: libavu - mem, basic data types, compat, version machinery libavdata - packet, frame, samples and pixels Sounds like libavcore. libavcomp - compressors libavhash - hashes Might as well just drop all these modules and make libgcrypt a mandatory dependency if it comes to this... I don't think anyone links to lavu exclusively for the crypto modules. A library like this would exist only to be linked against lavc/lavf. This is something I'd like to have soon if nobody is strongly against it. lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] dashenc: replace attribute id with contentType for the AdaptationSet element
id should be an integer, not a string. It is also optional, so use contentType instead which is the proper attribute for these values. This fixes an MPD validation error. Signed-off-by: James Almer jamr...@gmail.com --- libavformat/dashenc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c index fc5c823..f228b86 100644 --- a/libavformat/dashenc.c +++ b/libavformat/dashenc.c @@ -503,7 +503,7 @@ static int write_manifest(AVFormatContext *s, int final) } if (c-has_video) { -avio_printf(out, \t\tAdaptationSet id=\video\ segmentAlignment=\true\ bitstreamSwitching=\true\\n); +avio_printf(out, \t\tAdaptationSet contentType=\video\ segmentAlignment=\true\ bitstreamSwitching=\true\\n); for (i = 0; i s-nb_streams; i++) { AVStream *st = s-streams[i]; OutputStream *os = c-streams[i]; @@ -516,7 +516,7 @@ static int write_manifest(AVFormatContext *s, int final) avio_printf(out, \t\t/AdaptationSet\n); } if (c-has_audio) { -avio_printf(out, \t\tAdaptationSet id=\audio\ segmentAlignment=\true\ bitstreamSwitching=\true\\n); +avio_printf(out, \t\tAdaptationSet contentType=\audio\ segmentAlignment=\true\ bitstreamSwitching=\true\\n); for (i = 0; i s-nb_streams; i++) { AVStream *st = s-streams[i]; OutputStream *os = c-streams[i]; -- 2.4.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] avcodec/libx265: use x265 Multi-library Interface to query the API
On 12/05/15 6:00 PM, Luca Barbato wrote: On 11/05/15 17:25, Derek Buitenhuis wrote: From: Gopu Govindaswamy g...@multicorewareinc.com The x265pic.bitDepth is set on encode_frame while I assume that this information should be used at init now. I'm not sure how recent is this api version, I hope it isn't necessary The API as used and required by this patch is not available on any tagged release right now. It will be in x265 1.7. to consider adding a fallback path even if it is easy with a bunch of defines since the signatures look the same beside x264 - ctx-api. I'll edit the subject and make it fit in case ti does not tomorrow. Thanks for picking it up. lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] avutil: remove pointless bmi1 define
Signed-off-by: James Almer jamr...@gmail.com --- libavutil/cpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 354d21e..4e8ef61 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -90,8 +90,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) -#define CPUFLAG_BMI1 (AV_CPU_FLAG_BMI1) -#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | CPUFLAG_BMI1) +#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1) static const AVOption cpuflags_opts[] = { { flags , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = flags }, #if ARCH_PPC @@ -113,7 +112,7 @@ int av_parse_cpu_flags(const char *s) { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 },.unit = flags }, -{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI1 },.unit = flags }, +{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1 },.unit = flags }, { bmi2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI2 },.unit = flags }, { 3dnow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW },.unit = flags }, { 3dnowext, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT },.unit = flags }, -- 2.3.5 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available
On 19/05/15 7:49 AM, Vittorio Giovara wrote: --- Another set of eyes for the pixel format mapping would be welcome. Vittorio libavcodec/libvpx.c| 26 ++ libavcodec/libvpx.h| 2 ++ libavcodec/libvpxdec.c | 4 ++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c index 20f4484..5adad66 100644 --- a/libavcodec/libvpx.c +++ b/libavcodec/libvpx.c @@ -33,3 +33,29 @@ int ff_vp9_check_experimental(AVCodecContext *avctx) } return 0; } + +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img) +{ +switch(img) { +case VPX_IMG_FMT_RGB24: return AV_PIX_FMT_RGB24; +case VPX_IMG_FMT_RGB565:return AV_PIX_FMT_RGB565BE; +case VPX_IMG_FMT_RGB555:return AV_PIX_FMT_RGB555BE; +case VPX_IMG_FMT_UYVY: return AV_PIX_FMT_UYVY422; +case VPX_IMG_FMT_YUY2: return AV_PIX_FMT_YUYV422; +case VPX_IMG_FMT_YVYU: return AV_PIX_FMT_YVYU422; +case VPX_IMG_FMT_BGR24: return AV_PIX_FMT_BGR24; +case VPX_IMG_FMT_ARGB: return AV_PIX_FMT_ARGB; +case VPX_IMG_FMT_ARGB_LE: return AV_PIX_FMT_BGRA; +case VPX_IMG_FMT_RGB565_LE: return AV_PIX_FMT_RGB565LE; +case VPX_IMG_FMT_RGB555_LE: return AV_PIX_FMT_RGB555LE; +case VPX_IMG_FMT_I420: return AV_PIX_FMT_YUV420P; vp8 supports only this one. Every other pix_fmt is vp9 only and should be guarded by a CONFIG_LIBVPX_VP9_DECODER preprocessor check. +case VPX_IMG_FMT_I422: return AV_PIX_FMT_YUV422P; +case VPX_IMG_FMT_I444: return AV_PIX_FMT_YUV444P; +case VPX_IMG_FMT_I440: return AV_PIX_FMT_YUV440P; This was added starting with libvpx 1.4.0. It will fail to compile with any prior version. A quick preprocessor check to make sure this define is available is VPX_IMAGE_ABI_VERSION = 3 +case VPX_IMG_FMT_444A: return AV_PIX_FMT_YUVA444P; +case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE; +case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE; +case VPX_IMG_FMT_I44416:return AV_PIX_FMT_YUV444P16BE; Likewise, these three were added with libvpx 1.4.0. Checking for VPX_IMG_FMT_HIGHBITDEPTH should suffice here, or alternatively, the same abi version check as above if git snapshots before 1.4.0 was tagged are not important. And the value of img-bit_depth should probably be checked instead and these high bitdepth pix_fmts set accordingly. +default:return AV_PIX_FMT_NONE; +} +} diff --git a/libavcodec/libvpx.h b/libavcodec/libvpx.h index cb1ed09..79a05f4 100644 --- a/libavcodec/libvpx.h +++ b/libavcodec/libvpx.h @@ -25,4 +25,6 @@ int ff_vp9_check_experimental(AVCodecContext *avctx); +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img); + #endif /* AVCODEC_LIBVPX_H */ diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c index 6052207..a1f9c22 100644 --- a/libavcodec/libvpxdec.c +++ b/libavcodec/libvpxdec.c @@ -56,7 +56,6 @@ static av_cold int vpx_init(AVCodecContext *avctx, return AVERROR(EINVAL); } -avctx-pix_fmt = AV_PIX_FMT_YUV420P; return 0; } @@ -82,7 +81,8 @@ static int vp8_decode(AVCodecContext *avctx, } if ((img = vpx_codec_get_frame(ctx-decoder, iter))) { -if (img-fmt != VPX_IMG_FMT_I420) { +avctx-pix_fmt = ff_vpx_imgfmt_to_pixfmt(img-fmt); +if (avctx-pix_fmt == AV_PIX_FMT_NONE) { av_log(avctx, AV_LOG_ERROR, Unsupported output colorspace (%d)\n, img-fmt); return AVERROR_INVALIDDATA; ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] mpjpegdec: don't try to alloc an AVIOContext when probe is guaranteed to fail
The first check is done without the AVIOContext, so alloc it only if said check succeeds Signed-off-by: James Almer jamr...@gmail.com --- libavformat/mpjpegdec.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c index 72891e7..e2a2ece 100644 --- a/libavformat/mpjpegdec.c +++ b/libavformat/mpjpegdec.c @@ -83,13 +83,13 @@ static int mpjpeg_read_probe(AVProbeData *p) char line[128] = { 0 }; int ret = 0; +if (p-buf_size 2 || p-buf[0] != '-' || p-buf[1] != '-') +return 0; + pb = avio_alloc_context(p-buf, p-buf_size, 0, NULL, NULL, NULL, NULL); if (!pb) return AVERROR(ENOMEM); -if (p-buf_size 2 || p-buf[0] != '-' || p-buf[1] != '-') -goto end; - while (!pb-eof_reached) { ret = get_line(pb, line, sizeof(line)); if (ret 0) @@ -101,7 +101,7 @@ static int mpjpeg_read_probe(AVProbeData *p) break; } } -end: + av_free(pb); return ret; -- 2.4.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] Introduce a TextureDSP module
On 02/06/15 8:09 AM, Vittorio Giovara wrote: +/* Alpha compression function */ +static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) +{ +int i, j; +int dist, bias, dist4, dist2, bits, mask; +int mn, mx; + +/* Find min/max color */ +mn = mx = block[3]; +for (j = 0; j 4; j++) { +for (i = 0; i 4; i++) { +int val = block[3 + i * 4 + j * stride]; +if (val mn) +mn = val; +else if (val mx) +mx = val; +} +} + +AV_ZERO64(dst); Documentation for AV_ZERO* says Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be naturally aligned. They may be implemented using MMX, so emms_c() must be called before using any float code afterwards. Make sure fate passes on x86_32 (targeting anything above i686, which is when AV_ZERO64 is implemented with MMX movq) as you're using float code all around. If it doesn't, then maybe you could bench to see if using AV_WN64 is faster than AV_ZERO64 + emms_c(). ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/2] libvpx: Support all pixel formats available in encoding and decoding
On 11/06/15 11:56 AM, Luca Barbato wrote: @@ -321,8 +321,12 @@ static av_cold int vpx_init(AVCodecContext *avctx, /* 0-3: For non-zero values the encoder increasingly optimizes for reduced complexity playback on low powered devices at the expense of encode quality. */ - if (avctx-profile != FF_PROFILE_UNKNOWN) - enccfg.g_profile = avctx-profile; +if (avctx-profile != FF_PROFILE_UNKNOWN) +enccfg.g_profile = avctx-profile; +else if (avctx-pix_fmt == AV_PIX_FMT_YUV440P) As i said before, profile 0 is 8bit yuv420p. +avctx-profile = enccfg.g_profile = FF_PROFILE_VP9_0; +else +avctx-profile = enccfg.g_profile = FF_PROFILE_VP9_1; enccfg.g_error_resilient = ctx-error_resilient; ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] vpx: Support version 1.3.0
On 11/06/15 11:56 AM, Luca Barbato wrote: --- I tied the supported formats to the ABI version. configure | 12 ++-- libavcodec/libvpx.c| 8 ++-- libavcodec/libvpxenc.c | 6 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/configure b/configure index a416dc2..8cb53d2 100755 --- a/configure +++ b/configure @@ -4312,19 +4312,19 @@ enabled libvo_amrwbenc require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_in enabled libvorbis require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg enabled libvpx { enabled libvpx_vp8_decoder { -require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver -lvpx || -die ERROR: libvpx encoder version must be =1.4.0; +require_pkg_config vpx = 1.3.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver || +die ERROR: libvpx encoder version must be = 1.3.0; } enabled libvpx_vp8_encoder { -require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver -lvpx || -die ERROR: libvpx encoder version must be =1.4.0; +require_pkg_config vpx = 1.3.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver || +die ERROR: libvpx encoder version must be = 1.3.0; } enabled libvpx_vp9_decoder { -require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver -lvpx || +require_pkg_config vpx = 1.3.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver || disable libvpx_vp9_decoder; } enabled libvpx_vp9_encoder { -require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver -lvpx || +require_pkg_config vpx = 1.3.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver || disable libvpx_vp9_encoder; Using require_pkg_config() makes configure abort if the check fails, so the disable() calls are dead code. That's why use_pkg_config() and check_pkg_config exist. Also, checking for the decoding/encoding header and the init function is apparently not enough. You need to check for the decoding/encoding interfaces vpx_codec_vp[89]_[cd]x because libvpx can be built without one or more of the four components. enabled libvpx require_pkg_config vpx = 1.3.0 vpx/vpx_codec.h vpx_codec_version { enabled libvpx_vp8_decoder { check_pkg_config vpx vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_vp8_dx || disable libvpx_vp8_decoder; } enabled libvpx_vp8_encoder { check_pkg_config vpx vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_vp8_cx || disable libvpx_vp8_encoder; } enabled libvpx_vp9_decoder { check_pkg_config vpx vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_vp9_dx || disable libvpx_vp9_decoder; } enabled libvpx_vp9_encoder { check_pkg_config vpx vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_vp9_cx || disable libvpx_vp9_encoder; } } Updated from the version i posted in a previous email (Which was wrong as it only checked for the header and init function). This will first check for a recent libvpx, then for each component. } } diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c index 603ed13..230bc49 100644 --- a/libavcodec/libvpx.c +++ b/libavcodec/libvpx.c @@ -39,11 +39,13 @@ enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img) case VPX_IMG_FMT_I420: return AV_PIX_FMT_YUV420P; case VPX_IMG_FMT_I422: return AV_PIX_FMT_YUV422P; case VPX_IMG_FMT_I444: return AV_PIX_FMT_YUV444P; -case VPX_IMG_FMT_I440: return AV_PIX_FMT_YUV440P; case VPX_IMG_FMT_444A: return AV_PIX_FMT_YUVA444P; +#ifdef VPX_IMG_FMT_HIGHBITDEPTH +case VPX_IMG_FMT_I440: return AV_PIX_FMT_YUV440P; The correct guard for VPX_IMG_FMT_I440 is VPX_IMAGE_ABI_VERSION =3. libvpx git snapshots post 1.3.0 and pre 1.4.0 may fail because they may define VPX_IMG_FMT_HIGHBITDEPTH but not VPX_IMG_FMT_I440. case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE; case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE; case VPX_IMG_FMT_I44416:return AV_PIX_FMT_YUV444P16BE; +#endif default:return AV_PIX_FMT_NONE; } } @@ -65,11 +67,13 @@ vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix) case AV_PIX_FMT_YUV420P: return VPX_IMG_FMT_I420; case AV_PIX_FMT_YUV422P: return VPX_IMG_FMT_I422; case AV_PIX_FMT_YUV444P: return VPX_IMG_FMT_I444; -case AV_PIX_FMT_YUV440P: return VPX_IMG_FMT_I440; case AV_PIX_FMT_YUVA444P: return VPX_IMG_FMT_444A; +#ifdef VPX_IMG_FMT_HIGHBITDEPTH +case AV_PIX_FMT_YUV440P: return VPX_IMG_FMT_I440; case AV_PIX_FMT_YUV420P16BE: return VPX_IMG_FMT_I42016; case AV_PIX_FMT_YUV422P16BE: return VPX_IMG_FMT_I42216; case AV_PIX_FMT_YUV444P16BE: return VPX_IMG_FMT_I44416;
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding
On 03/06/15 11:29 AM, Luca Barbato wrote: On 31/05/15 17:01, Luca Barbato wrote: On 27/05/15 22:25, James Almer wrote: Yes, that plus a considerable amount of ifdeffery in the code. It will be ugly, but i also think it's worth keeping compatibility with at least 1.3.0 1.3.0 explodes on 422p that at least in theory should support. (I'm testing all the possible encodings right now). 1.3.0 seems that had been released when not ready, do we really want to support it? lu It works with 420p content (vp8 and vp9). Support for 422p, 440p, 444p and high bit-depth was officially added with 1.4.0. 1.3.0 should reject the latter stuff, but for some reason it doesn't and it encodes garbage. With some ifdeffery and static init magic both the decoder and encoder can be limited to 420p for vpx 1.3.0. But then again, I guess vpx 1.3.0 is old enough by now that support for it can be safely dropped. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] configure: don't enable tls protocols if network is disabled
This was a regression introduced with d8ffb2055f0e0fcb5d025bab72eb19c2a886c125. Signed-off-by: James Almer jamr...@gmail.com --- configure | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure b/configure index cdc5a8d..a29cd38 100755 --- a/configure +++ b/configure @@ -2216,6 +2216,8 @@ srtp_protocol_select=rtp_protocol tcp_protocol_select=network tls_gnutls_protocol_deps=gnutls tls_openssl_protocol_deps=openssl !tls_gnutls_protocol +tls_gnutls_protocol_select=tcp_protocol +tls_openssl_protocol_select=tcp_protocol tls_protocol_deps_any=tls_gnutls_protocol tls_openssl_protocol tls_protocol_select=tcp_protocol udp_protocol_select=network -- 2.4.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding
On 27/05/15 1:15 PM, Vittorio Giovara wrote: Bump the minimum libvpx version to 1.4.0 so that all pixel formats are present. Add new VP9 profiles. Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com --- Modified as requested. Vittorio configure | 23 +++-- libavcodec/avcodec.h | 4 libavcodec/libvpx.c| 56 ++ libavcodec/libvpx.h| 3 ++- libavcodec/libvpxdec.c | 7 ++- libavcodec/libvpxenc.c | 32 + libavcodec/version.h | 2 +- 7 files changed, 97 insertions(+), 30 deletions(-) diff --git a/configure b/configure index 863e33b..e26fc54 100755 --- a/configure +++ b/configure @@ -4274,12 +4274,23 @@ enabled libvo_aacenc require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncA enabled libvo_amrwbenc require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvorbis require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg enabled libvpx { -enabled libvpx_vp8_decoder { check_lib2 vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_dec_init_ver -lvpx || -die ERROR: libvpx decoder version must be =0.9.1; } -enabled libvpx_vp8_encoder { check_lib2 vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_enc_init_ver VPX_CQ -lvpx || -die ERROR: libvpx encoder version must be =0.9.6; } -enabled libvpx_vp9_decoder { check_lib2 vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_vp9_dx -lvpx || disable libvpx_vp9_decoder; } -enabled libvpx_vp9_encoder { check_lib2 vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_vp9_cx -lvpx || disable libvpx_vp9_encoder; } } +enabled libvpx_vp8_decoder { +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver -lvpx || +die ERROR: libvpx encoder version must be =1.4.0; As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just used as a name to report a failure. It does not check for that version. Also, all require functions terminate configure with an error if the check fails, so these custom die calls are dead code. If you want to use your own custom error, use use_pkg_config instead. +} +enabled libvpx_vp8_encoder { +require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver -lvpx || +die ERROR: libvpx encoder version must be =1.4.0; +} +enabled libvpx_vp9_decoder { +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver -lvpx || +disable libvpx_vp9_decoder; +} +enabled libvpx_vp9_encoder { +require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver -lvpx || +disable libvpx_vp9_encoder; +} +} enabled libwavpack require libwavpack wavpack/wavpack.h WavpackOpenFileOutput -lwavpack enabled libwebprequire_pkg_config libwebp webp/encode.h WebPGetEncoderVersion enabled libx264require_pkg_config x264 stdint.h x264.h x264_encoder_encode diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 3440126..16af20c 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2702,6 +2702,10 @@ typedef struct AVCodecContext { #define FF_PROFILE_JPEG2000_DCINEMA_2K 3 #define FF_PROFILE_JPEG2000_DCINEMA_4K 4 +#define FF_PROFILE_VP9_00 +#define FF_PROFILE_VP9_11 +#define FF_PROFILE_VP9_22 +#define FF_PROFILE_VP9_33 Even if it's a simple change, credit where credit is due would be nice. #define FF_PROFILE_HEVC_MAIN1 #define FF_PROFILE_HEVC_MAIN_10 2 diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c index 20f4484..603ed13 100644 --- a/libavcodec/libvpx.c +++ b/libavcodec/libvpx.c @@ -22,14 +22,54 @@ #include libvpx.h -int ff_vp9_check_experimental(AVCodecContext *avctx) +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img) { -if (avctx-strict_std_compliance FF_COMPLIANCE_EXPERIMENTAL -(vpx_codec_version_major() 1 || - (vpx_codec_version_major() == 1 vpx_codec_version_minor() 3))) { -av_log(avctx, AV_LOG_ERROR, - Non-experimental support of VP9 requires libvpx = 1.3.0\n); -return AVERROR_EXPERIMENTAL; +switch (img) { +case VPX_IMG_FMT_RGB24: return AV_PIX_FMT_RGB24; +case VPX_IMG_FMT_RGB565:return AV_PIX_FMT_RGB565BE; +case VPX_IMG_FMT_RGB555:return AV_PIX_FMT_RGB555BE; +case VPX_IMG_FMT_UYVY: return AV_PIX_FMT_UYVY422; +case VPX_IMG_FMT_YUY2: return AV_PIX_FMT_YUYV422; +case VPX_IMG_FMT_YVYU: return AV_PIX_FMT_YVYU422; +case VPX_IMG_FMT_BGR24: return AV_PIX_FMT_BGR24; +case VPX_IMG_FMT_ARGB: return
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding
On 27/05/15 3:24 PM, James Almer wrote: On 27/05/15 1:15 PM, Vittorio Giovara wrote: Bump the minimum libvpx version to 1.4.0 so that all pixel formats are present. Add new VP9 profiles. Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com --- Modified as requested. Vittorio configure | 23 +++-- libavcodec/avcodec.h | 4 libavcodec/libvpx.c| 56 ++ libavcodec/libvpx.h| 3 ++- libavcodec/libvpxdec.c | 7 ++- libavcodec/libvpxenc.c | 32 + libavcodec/version.h | 2 +- 7 files changed, 97 insertions(+), 30 deletions(-) diff --git a/configure b/configure index 863e33b..e26fc54 100755 --- a/configure +++ b/configure @@ -4274,12 +4274,23 @@ enabled libvo_aacenc require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncA enabled libvo_amrwbenc require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvorbis require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg enabled libvpx { -enabled libvpx_vp8_decoder { check_lib2 vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_dec_init_ver -lvpx || -die ERROR: libvpx decoder version must be =0.9.1; } -enabled libvpx_vp8_encoder { check_lib2 vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_enc_init_ver VPX_CQ -lvpx || -die ERROR: libvpx encoder version must be =0.9.6; } -enabled libvpx_vp9_decoder { check_lib2 vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_vp9_dx -lvpx || disable libvpx_vp9_decoder; } -enabled libvpx_vp9_encoder { check_lib2 vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_vp9_cx -lvpx || disable libvpx_vp9_encoder; } } +enabled libvpx_vp8_decoder { +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver -lvpx || +die ERROR: libvpx encoder version must be =1.4.0; As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just used as a name to report a failure. It does not check for that version. Also, all require functions terminate configure with an error if the check fails, so these custom die calls are dead code. If you want to use your own custom error, use use_pkg_config instead. Also, since 1.4.0 is the minimum required version now, you can simplify all this into enabled libvpx require_pkg_config vpx = 1.4.0 vpx/vpx_codec.h vpx_codec_version { enabled_any libvpx_vp8_decoder libvpx_vp9_decoder { check_pkg_config vpx vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_dec_init_ver || disable libvpx_vp8_decoder libvpx_vp9_decoder; } enabled_any libvpx_vp8_encoder libvpx_vp9_encoder { check_pkg_config vpx vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_enc_init_ver || disable libvpx_vp8_encoder libvpx_vp9_encoder; } } Which will check for libvpx 1.4.0 first, then for the decoder and encoding headers depending on enabled components. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding
On 27/05/15 5:04 PM, Martin Storsjö wrote: On Wed, 27 May 2015, Vittorio Giovara wrote: Bump the minimum libvpx version to 1.4.0 so that all pixel formats are present. Add new VP9 profiles. Sorry to be a bit late to the party, but how bad would it be to keep compat with older versions? Was there any other argument for dropping older versions than because we can, and x265 did it? Allowing people to build with the earlier versions with the reduced (old/existing) featureset is something that I'd appreciate. I think x265 might have been a bit special case since that involved a bigger API change than this, to the point that keeping compat would be uglier? Or would it require some ugly static initialization of the pixfmt list? In that case I guess it can be argued that it's simpler just to bump the requirement. Yes, that plus a considerable amount of ifdeffery in the code. It will be ugly, but i also think it's worth keeping compatibility with at least 1.3.0 // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH v2 1/3] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag
On 26/05/15 2:29 PM, James Almer wrote: Signed-off-by: James Almer jamr...@gmail.com --- No changes from last revision. doc/APIchanges | 3 +++ libavutil/cpu.c | 3 +++ libavutil/cpu.h | 1 + libavutil/version.h | 4 ++-- libavutil/x86/cpu.c | 17 ++--- 5 files changed, 23 insertions(+), 5 deletions(-) Ping ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding
On 28/05/15 9:11 AM, Vittorio Giovara wrote: On Wed, May 27, 2015 at 7:24 PM, James Almer jamr...@gmail.com wrote: As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just used as a name to report a failure. It does not check for that version. I swear I couldn't get it working with just require_pkg_config, thanks for showing how to do that in the next email. For the record, the example i gave in the other email is only valid if the libvpx requirement is bumped to 1.3.0 or newer. Otherwise, individual tests for each component (like it's done right now) will still be needed. diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 3440126..16af20c 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2702,6 +2702,10 @@ typedef struct AVCodecContext { #define FF_PROFILE_JPEG2000_DCINEMA_2K 3 #define FF_PROFILE_JPEG2000_DCINEMA_4K 4 +#define FF_PROFILE_VP9_00 +#define FF_PROFILE_VP9_11 +#define FF_PROFILE_VP9_22 +#define FF_PROFILE_VP9_33 Even if it's a simple change, credit where credit is due would be nice. credit to whom and for what? https://git.videolan.org/?p=ffmpeg.git;a=commitdiff;h=079b7f6eacc09bc2813fc1ddc230ab05022b69c2 https://git.videolan.org/?p=ffmpeg.git;a=commitdiff;h=01e59d48ed1a41b88107ed1d4d56ae0cbcd1a60e ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] configure: don't enable tls protocols if network is disabled
This was a regression introduced with d8ffb2055f0e0fcb5d025bab72eb19c2a886c125. Signed-off-by: James Almer jamr...@gmail.com --- configure | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 30d6f18..2458adb 100755 --- a/configure +++ b/configure @@ -2215,9 +2215,10 @@ sctp_protocol_select=network srtp_protocol_select=rtp_protocol tcp_protocol_select=network tls_gnutls_protocol_deps=gnutls +tls_gnutls_protocol_select=tcp_protocol tls_openssl_protocol_deps=openssl !tls_gnutls_protocol +tls_openssl_protocol_select=tcp_protocol tls_protocol_deps_any=tls_gnutls_protocol tls_openssl_protocol -tls_protocol_select=tcp_protocol udp_protocol_select=network unix_protocol_deps=sys_un_h unix_protocol_select=network -- 2.4.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/4] configure: Require LPDIRECT3DSURFACE9 for dxva2
On 02/06/15 4:27 AM, Martin Storsjö wrote: On Mon, 1 Jun 2015, James Almer wrote: On 01/06/15 7:54 AM, Martin Storsjö wrote: This fixes dxva2 detection (i.e. correctly realizes that it isn't available) for WinRT, where dxva2api.h does exist, but these definitions are omitted (when targeting the API subsets). Ideally we should rather check for e.g. DXVA2_ConfigPictureDecode, but configure might fail to find that definition due to _WIN32_WINNT not being set to the right value during configure. (libavcodec/dxva2.h manually overrides the _WIN32_WINNT define.) Something like enabled dxva2api_h check_type dxva2api.h DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0600 || disable dxva2api_h Thanks - I somehow missed that check_type can take other parameters to use while compiling. Should work then. You can put it above the d3d11_cobj check (Which IMO should be removed alongside the d3d11va_lib check until actual d3d11 support is added to avconv, for that matter). I see this patch was committed already, so up to you if you prefer the above solution or not. This does sound better indeed (and I agree about removing the extra d3d11 things for avconv support which isn't there yet). Although I think it's a bit more straightforward to just add this as an unconditional check_type call without intermixing it with enabling/disabling dxva2api_h though. Yeah, i realized after sending that email that you can probably just replace the check_type for LPDIRECT3DSURFACE9 with this one, and of course also the relevant dependency on dxva2_deps. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] D3D11va: add a Direct3D11 video decoder similar to DXVA2
On 25/05/15 2:31 AM, Steve Lhomme wrote: On Sun, May 24, 2015 at 1:13 PM, Luca Barbato lu_z...@gentoo.org wrote: On 24/05/15 07:59, Steve Lhomme wrote: Any update on this patch ? If it works for you I'll merge it Monday. I do not have mean to test it directly I guess. Yes, it works. Building may be tricky until my patches are merged into wine mingw-w64, unless you build with the Microsoft SDK. In addition to the mingw-w64 breakage i mentioned in another thread, this is making the h264, hevc and other fate tests fail on msvc x86_32. https://fate.libav.org/x86_32-msvc11-windows-native/20150525152900 https://fate.libav.org/x86_32-msvc12-windows-native/20150525155646 msvc x86_64 seems unaffected. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: we don't need d3d11va_lib as avconv doesn't support it
On 25/05/15 11:49 AM, Steve Lhomme wrote: --- configure | 5 - 1 file changed, 5 deletions(-) diff --git a/configure b/configure index 18280b9..a9ecad1 100755 --- a/configure +++ b/configure @@ -1555,7 +1555,6 @@ HAVE_LIST= atomics_native dos_paths d3d11_cobj -d3d11va_lib dxva2_lib libc_msvcrt libdc1394_1 @@ -4618,10 +4617,6 @@ check_deps $CONFIG_LIST \ $HAVE_LIST \ $ALL_COMPONENTS\ -enabled_all d3d11va d3d11_cobj CoTaskMemFree -prepend avconv_libs $($ldflags_filter -lole32) -enable d3d11va_lib - enabled_all dxva2 CoTaskMemFree prepend avconv_libs $($ldflags_filter -lole32) enable dxva2_lib You could also remove d3d11_cobj and its configure check, then. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: we don't need d3d11va_lib as avconv doesn't support it
On 25/05/15 8:24 PM, James Almer wrote: On 25/05/15 11:49 AM, Steve Lhomme wrote: --- configure | 5 - 1 file changed, 5 deletions(-) diff --git a/configure b/configure index 18280b9..a9ecad1 100755 --- a/configure +++ b/configure @@ -1555,7 +1555,6 @@ HAVE_LIST= atomics_native dos_paths d3d11_cobj -d3d11va_lib dxva2_lib libc_msvcrt libdc1394_1 @@ -4618,10 +4617,6 @@ check_deps $CONFIG_LIST \ $HAVE_LIST \ $ALL_COMPONENTS\ -enabled_all d3d11va d3d11_cobj CoTaskMemFree -prepend avconv_libs $($ldflags_filter -lole32) -enable d3d11va_lib - enabled_all dxva2 CoTaskMemFree prepend avconv_libs $($ldflags_filter -lole32) enable dxva2_lib You could also remove d3d11_cobj and its configure check, then. Actually no, don't remove the d3d11_cobj check. Repurpose it as it's the only check that actually makes sure things will work: https://fate.libav.org/x86_64-mingw-w64-gcc-5.1/20150525105137/compile CONFIG_D3D11VA, currently checked in libavcodec, is true if d3d11.h and dxva.h exist, but those existing doesn't mean the needed functionality is there, as shown in the above FATE client using a recent mingw-w64 version. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag
Signed-off-by: James Almer jamr...@gmail.com --- Updated with a new libavutil version after the d3d11 patch. doc/APIchanges | 3 +++ libavutil/cpu.c | 3 +++ libavutil/cpu.h | 1 + libavutil/version.h | 4 ++-- libavutil/x86/cpu.c | 17 ++--- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5d39ec6..2c443b0 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,9 @@ libavutil: 2014-08-09 API changes, most recent first: +2015-xx-xx - xxx - lavu 54.14.0 - cpu.h + Add AV_CPU_FLAG_AVXSLOW. + 2015-xx-xx - xxx - lavc 56.23.0 Add av_vda_default_init2. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 4e8ef61..e24b9dd 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) +#define CPUFLAG_AVXSLOW (AV_CPU_FLAG_AVXSLOW | CPUFLAG_AVX) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) @@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s) { sse4.1 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 },.unit = flags }, { sse4.2 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 },.unit = flags }, { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX },.unit = flags }, +{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW },.unit = flags }, { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP },.unit = flags }, { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, @@ -219,6 +221,7 @@ static const struct { { AV_CPU_FLAG_SSE4, sse4.1 }, { AV_CPU_FLAG_SSE42, sse4.2 }, { AV_CPU_FLAG_AVX, avx}, +{ AV_CPU_FLAG_AVXSLOW, avxslow}, { AV_CPU_FLAG_XOP, xop}, { AV_CPU_FLAG_FMA3, fma3 }, { AV_CPU_FLAG_FMA4, fma4 }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 7ce..c9469b3 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -45,6 +45,7 @@ #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions #define AV_CPU_FLAG_AVX 0x4000 /// AVX functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_AVXSLOW 0x800 /// AVX supported, but slow when using YMM registers (e.g. Bulldozer) #define AV_CPU_FLAG_XOP 0x0400 /// Bulldozer XOP functions #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov diff --git a/libavutil/version.h b/libavutil/version.h index 13bb6f0..c3342cd 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -54,8 +54,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 54 -#define LIBAVUTIL_VERSION_MINOR 13 -#define LIBAVUTIL_VERSION_MICRO 1 +#define LIBAVUTIL_VERSION_MINOR 14 +#define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 8be6d94..098ccf7 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void) if (ext_caps (1 22)) rval |= AV_CPU_FLAG_MMXEXT; +if (!strncmp(vendor.c, AuthenticAMD, 12)) { /* Allow for selectively disabling SSE2 functions on AMD processors with SSE2 support but not SSE4a. This includes Athlon64, some Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster @@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void) AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case so that SSE2 is used unless explicitly disabled by checking AV_CPU_FLAG_SSE2SLOW. */ -if (!strncmp(vendor.c, AuthenticAMD, 12) -rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) { -rval |= AV_CPU_FLAG_SSE2SLOW; +if (rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) +rval |= AV_CPU_FLAG_SSE2SLOW; + +/* Similar to the above but for AVX functions on AMD processors. + This is necessary only for functions using YMM registers on Bulldozer + based CPUs as they lack 256-bits execution units. SSE/AVX functions + using XMM registers are always faster on them. + AV_CPU_FLAG_AVX
[libav-devel] [PATCH 2/2] x86: check for AV_CPU_FLAG_AVXSLOW where useful
Signed-off-by: James Almer jamr...@gmail.com --- The FMA4 functions from libavresample's audio_mix need to be handled differently. Disabling them if avxslow is true is pointless since no CPU out there currently has FMA4 and a fast float execution unit. So I'm thinking about duplicating them and doing: FMA3 YMM/XMM for current Intel CPUs (Basically, renaming the existing functions) FMA4 XMM for current AMD stuff (Regardless of x86_32 or x86_64). I'll see about implementing that in the coming days. libavcodec/x86/dcadsp_init.c | 4 ++-- libavcodec/x86/dct_init.c | 2 +- libavcodec/x86/fft_init.c | 2 +- libavfilter/x86/af_volume_init.c | 2 +- libavresample/x86/audio_convert_init.c | 10 ++ libavresample/x86/audio_mix_init.c | 10 ++ libavresample/x86/dither_init.c| 4 ++-- libavutil/x86/float_dsp_init.c | 2 +- libavutil/x86/lls_init.c | 2 +- 9 files changed, 21 insertions(+), 17 deletions(-) diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 9acb818..8deb6d6 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -98,10 +98,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_SSE2(cpu_flags)) { s-synth_filter_float = synth_filter_sse2; } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW)) { s-synth_filter_float = synth_filter_avx; } -if (EXTERNAL_FMA3(cpu_flags)) { +if (EXTERNAL_FMA3(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW)) { s-synth_filter_float = synth_filter_fma3; } #endif /* HAVE_YASM */ diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c index 7bda5e8..660d118 100644 --- a/libavcodec/x86/dct_init.c +++ b/libavcodec/x86/dct_init.c @@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s) s-dct32 = ff_dct32_float_sse; if (EXTERNAL_SSE2(cpu_flags)) s-dct32 = ff_dct32_float_sse2; -if (EXTERNAL_AVX(cpu_flags)) +if (EXTERNAL_AVX(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW)) s-dct32 = ff_dct32_float_avx; } diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c index 7ca72c5..840f348 100644 --- a/libavcodec/x86/fft_init.c +++ b/libavcodec/x86/fft_init.c @@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s) s-fft_calc= ff_fft_calc_sse; s-fft_permutation = FF_FFT_PERM_SWAP_LSBS; } -if (EXTERNAL_AVX(cpu_flags) s-nbits = 5) { +if (EXTERNAL_AVX(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW) s-nbits = 5) { /* AVX for SB */ s-imdct_half = ff_imdct_half_avx; s-fft_calc= ff_fft_calc_avx; diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c index c59e0ed..f70bafa 100644 --- a/libavfilter/x86/af_volume_init.c +++ b/libavfilter/x86/af_volume_init.c @@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol) vol-scale_samples = ff_scale_samples_s32_ssse3_atom; vol-samples_align = 4; } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW)) { vol-scale_samples = ff_scale_samples_s32_avx; vol-samples_align = 8; } diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d85ca84..1aab0f7 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -227,10 +227,12 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, SSE4, ff_conv_fltp_to_flt_6ch_sse4); } if (EXTERNAL_AVX(cpu_flags)) { -ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, - 0, 32, 16, AVX, ff_conv_s32_to_flt_avx); -ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, - 0, 32, 32, AVX, ff_conv_flt_to_s32_avx); +if (!(cpu_flags AV_CPU_FLAG_AVXSLOW)) { +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, + 0, 32, 16, AVX, ff_conv_s32_to_flt_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, + 0, 32, 32, AVX, ff_conv_flt_to_s32_avx); +} ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, AVX, ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, diff --git a/libavresample/x86/audio_mix_init.c b/libavresample/x86/audio_mix_init.c index 7fc530e..4fc2749 100644 --- a/libavresample/x86/audio_mix_init.c +++ b/libavresample/x86/audio_mix_init.c @@ -196,10 +196,12 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am
[libav-devel] [PATCH] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag
Signed-off-by: James Almer jamr...@gmail.com --- doc/APIchanges | 3 +++ libavutil/cpu.c | 3 +++ libavutil/cpu.h | 1 + libavutil/version.h | 4 ++-- libavutil/x86/cpu.c | 17 ++--- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5d39ec6..b126364 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,9 @@ libavutil: 2014-08-09 API changes, most recent first: +2015-xx-xx - xxx - lavu 54.13.0 - cpu.h + Add AV_CPU_FLAG_AVXSLOW. + 2015-xx-xx - xxx - lavc 56.23.0 Add av_vda_default_init2. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 4e8ef61..e24b9dd 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) +#define CPUFLAG_AVXSLOW (AV_CPU_FLAG_AVXSLOW | CPUFLAG_AVX) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) @@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s) { sse4.1 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 },.unit = flags }, { sse4.2 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 },.unit = flags }, { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX },.unit = flags }, +{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW },.unit = flags }, { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP },.unit = flags }, { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, @@ -219,6 +221,7 @@ static const struct { { AV_CPU_FLAG_SSE4, sse4.1 }, { AV_CPU_FLAG_SSE42, sse4.2 }, { AV_CPU_FLAG_AVX, avx}, +{ AV_CPU_FLAG_AVXSLOW, avxslow}, { AV_CPU_FLAG_XOP, xop}, { AV_CPU_FLAG_FMA3, fma3 }, { AV_CPU_FLAG_FMA4, fma4 }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 7ce..c9469b3 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -45,6 +45,7 @@ #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions #define AV_CPU_FLAG_AVX 0x4000 /// AVX functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_AVXSLOW 0x800 /// AVX supported, but slow when using YMM registers (e.g. Bulldozer) #define AV_CPU_FLAG_XOP 0x0400 /// Bulldozer XOP functions #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov diff --git a/libavutil/version.h b/libavutil/version.h index 9c45e0e..378f7b7 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -54,8 +54,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 54 -#define LIBAVUTIL_VERSION_MINOR 12 -#define LIBAVUTIL_VERSION_MICRO 1 +#define LIBAVUTIL_VERSION_MINOR 13 +#define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 8be6d94..098ccf7 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void) if (ext_caps (1 22)) rval |= AV_CPU_FLAG_MMXEXT; +if (!strncmp(vendor.c, AuthenticAMD, 12)) { /* Allow for selectively disabling SSE2 functions on AMD processors with SSE2 support but not SSE4a. This includes Athlon64, some Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster @@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void) AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case so that SSE2 is used unless explicitly disabled by checking AV_CPU_FLAG_SSE2SLOW. */ -if (!strncmp(vendor.c, AuthenticAMD, 12) -rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) { -rval |= AV_CPU_FLAG_SSE2SLOW; +if (rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) +rval |= AV_CPU_FLAG_SSE2SLOW; + +/* Similar to the above but for AVX functions on AMD processors. + This is necessary only for functions using YMM registers on Bulldozer + based CPUs as they lack 256-bits execution units. SSE/AVX functions + using XMM registers are always faster on them. + AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is + used
[libav-devel] [PATCH v2 3/3] x86: check for AV_CPU_FLAG_AVXSLOW where useful
Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp_init.c | 4 ++-- libavcodec/x86/dct_init.c | 2 +- libavcodec/x86/fft_init.c | 2 +- libavfilter/x86/af_volume_init.c | 2 +- libavresample/x86/audio_convert_init.c | 4 +++- libavresample/x86/audio_mix_init.c | 4 +++- libavresample/x86/dither_init.c| 4 ++-- libavutil/x86/float_dsp_init.c | 2 +- libavutil/x86/lls_init.c | 2 +- 9 files changed, 15 insertions(+), 11 deletions(-) diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 9acb818..7c2bec1 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -98,10 +98,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_SSE2(cpu_flags)) { s-synth_filter_float = synth_filter_sse2; } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX_FAST(cpu_flags)) { s-synth_filter_float = synth_filter_avx; } -if (EXTERNAL_FMA3(cpu_flags)) { +if (EXTERNAL_FMA3(cpu_flags) !(cpu_flags AV_CPU_FLAG_AVXSLOW)) { s-synth_filter_float = synth_filter_fma3; } #endif /* HAVE_YASM */ diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c index 7bda5e8..ca9fbc7 100644 --- a/libavcodec/x86/dct_init.c +++ b/libavcodec/x86/dct_init.c @@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s) s-dct32 = ff_dct32_float_sse; if (EXTERNAL_SSE2(cpu_flags)) s-dct32 = ff_dct32_float_sse2; -if (EXTERNAL_AVX(cpu_flags)) +if (EXTERNAL_AVX_FAST(cpu_flags)) s-dct32 = ff_dct32_float_avx; } diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c index 7ca72c5..5c0273d 100644 --- a/libavcodec/x86/fft_init.c +++ b/libavcodec/x86/fft_init.c @@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s) s-fft_calc= ff_fft_calc_sse; s-fft_permutation = FF_FFT_PERM_SWAP_LSBS; } -if (EXTERNAL_AVX(cpu_flags) s-nbits = 5) { +if (EXTERNAL_AVX_FAST(cpu_flags) s-nbits = 5) { /* AVX for SB */ s-imdct_half = ff_imdct_half_avx; s-fft_calc= ff_fft_calc_avx; diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c index c59e0ed..26605fb 100644 --- a/libavfilter/x86/af_volume_init.c +++ b/libavfilter/x86/af_volume_init.c @@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol) vol-scale_samples = ff_scale_samples_s32_ssse3_atom; vol-samples_align = 4; } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX_FAST(cpu_flags)) { vol-scale_samples = ff_scale_samples_s32_avx; vol-samples_align = 8; } diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d85ca84..ae6c319 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -226,11 +226,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, SSE4, ff_conv_fltp_to_flt_6ch_sse4); } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX_FAST(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, 0, 32, 16, AVX, ff_conv_s32_to_flt_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 32, 32, AVX, ff_conv_flt_to_s32_avx); +} +if (EXTERNAL_AVX(cpu_flags)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, AVX, ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, diff --git a/libavresample/x86/audio_mix_init.c b/libavresample/x86/audio_mix_init.c index 7fc530e..e14a540 100644 --- a/libavresample/x86/audio_mix_init.c +++ b/libavresample/x86/audio_mix_init.c @@ -195,11 +195,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 1, 2, 16, 8, SSE4, ff_mix_1_to_2_s16p_flt_sse4); } -if (EXTERNAL_AVX(cpu_flags)) { +if (EXTERNAL_AVX_FAST(cpu_flags)) { ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 2, 1, 32, 16, AVX, ff_mix_2_to_1_fltp_flt_avx); ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 1, 2, 32, 8, AVX, ff_mix_1_to_2_fltp_flt_avx); +} +if (EXTERNAL_AVX(cpu_flags)) { ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 1, 2, 16, 8, AVX, ff_mix_1_to_2_s16p_flt_avx); } diff --git a/libavresample/x86/dither_init.c b/libavresample/x86
[libav-devel] [PATCH v2 1/3] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag
Signed-off-by: James Almer jamr...@gmail.com --- No changes from last revision. doc/APIchanges | 3 +++ libavutil/cpu.c | 3 +++ libavutil/cpu.h | 1 + libavutil/version.h | 4 ++-- libavutil/x86/cpu.c | 17 ++--- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 5d39ec6..2c443b0 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,9 @@ libavutil: 2014-08-09 API changes, most recent first: +2015-xx-xx - xxx - lavu 54.14.0 - cpu.h + Add AV_CPU_FLAG_AVXSLOW. + 2015-xx-xx - xxx - lavc 56.23.0 Add av_vda_default_init2. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 4e8ef61..e24b9dd 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) +#define CPUFLAG_AVXSLOW (AV_CPU_FLAG_AVXSLOW | CPUFLAG_AVX) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) @@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s) { sse4.1 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 },.unit = flags }, { sse4.2 , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 },.unit = flags }, { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX },.unit = flags }, +{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW },.unit = flags }, { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP },.unit = flags }, { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 },.unit = flags }, { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 },.unit = flags }, @@ -219,6 +221,7 @@ static const struct { { AV_CPU_FLAG_SSE4, sse4.1 }, { AV_CPU_FLAG_SSE42, sse4.2 }, { AV_CPU_FLAG_AVX, avx}, +{ AV_CPU_FLAG_AVXSLOW, avxslow}, { AV_CPU_FLAG_XOP, xop}, { AV_CPU_FLAG_FMA3, fma3 }, { AV_CPU_FLAG_FMA4, fma4 }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 7ce..c9469b3 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -45,6 +45,7 @@ #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions #define AV_CPU_FLAG_AVX 0x4000 /// AVX functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_AVXSLOW 0x800 /// AVX supported, but slow when using YMM registers (e.g. Bulldozer) #define AV_CPU_FLAG_XOP 0x0400 /// Bulldozer XOP functions #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov diff --git a/libavutil/version.h b/libavutil/version.h index 13bb6f0..c3342cd 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -54,8 +54,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 54 -#define LIBAVUTIL_VERSION_MINOR 13 -#define LIBAVUTIL_VERSION_MICRO 1 +#define LIBAVUTIL_VERSION_MINOR 14 +#define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 8be6d94..098ccf7 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void) if (ext_caps (1 22)) rval |= AV_CPU_FLAG_MMXEXT; +if (!strncmp(vendor.c, AuthenticAMD, 12)) { /* Allow for selectively disabling SSE2 functions on AMD processors with SSE2 support but not SSE4a. This includes Athlon64, some Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster @@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void) AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case so that SSE2 is used unless explicitly disabled by checking AV_CPU_FLAG_SSE2SLOW. */ -if (!strncmp(vendor.c, AuthenticAMD, 12) -rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) { -rval |= AV_CPU_FLAG_SSE2SLOW; +if (rval AV_CPU_FLAG_SSE2 !(ecx 0x0040)) +rval |= AV_CPU_FLAG_SSE2SLOW; + +/* Similar to the above but for AVX functions on AMD processors. + This is necessary only for functions using YMM registers on Bulldozer + based CPUs as they lack 256-bits execution units. SSE/AVX functions + using XMM registers are always faster on them. + AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so
[libav-devel] [PATCH v2 2/3] x86/cpu: add helper macros to check for slow cpuflags
Signed-off-by: James Almer jamr...@gmail.com --- libavutil/cpu_internal.h | 12 libavutil/x86/cpu.h | 18 ++ 2 files changed, 30 insertions(+) diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 3bfe8a8..2e9b44b 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -24,8 +24,20 @@ #define CPUEXT_SUFFIX(flags, suffix, cpuext)\ (HAVE_ ## cpuext ## suffix ((flags) AV_CPU_FLAG_ ## cpuext)) +#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) \ +(HAVE_ ## cpuext ## suffix ((flags) AV_CPU_FLAG_ ## cpuext) \ + !((flags) AV_CPU_FLAG_ ## cpuext ## SLOW)) + +#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) \ +(HAVE_ ## cpuext ## suffix ((flags) AV_CPU_FLAG_ ## cpuext) \ + ((flags) AV_CPU_FLAG_ ## cpuext ## SLOW)) + #define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext) +#define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext) + +#define CPUEXT_SLOW(flags, cpuext) CPUEXT_SUFFIX_SLOW(flags, , cpuext) + int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 50da30e..0695436 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -32,11 +32,17 @@ #define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT) #define X86_SSE(flags) CPUEXT(flags, SSE) #define X86_SSE2(flags) CPUEXT(flags, SSE2) +#define X86_SSE2_FAST(flags)CPUEXT_FAST(flags, SSE2) +#define X86_SSE2_SLOW(flags)CPUEXT_SLOW(flags, SSE2) #define X86_SSE3(flags) CPUEXT(flags, SSE3) +#define X86_SSE3_FAST(flags)CPUEXT_FAST(flags, SSE3) +#define X86_SSE3_SLOW(flags)CPUEXT_SLOW(flags, SSE3) #define X86_SSSE3(flags)CPUEXT(flags, SSSE3) #define X86_SSE4(flags) CPUEXT(flags, SSE4) #define X86_SSE42(flags)CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) +#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX) +#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX) #define X86_XOP(flags) CPUEXT(flags, XOP) #define X86_FMA3(flags) CPUEXT(flags, FMA3) #define X86_FMA4(flags) CPUEXT(flags, FMA4) @@ -48,11 +54,17 @@ #define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT) #define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE) #define EXTERNAL_SSE2(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2) #define EXTERNAL_SSE3(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3) #define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3) #define EXTERNAL_SSE4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4) #define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_FAST(flags)CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX) +#define EXTERNAL_AVX_SLOW(flags)CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX) #define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) #define EXTERNAL_FMA3(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3) #define EXTERNAL_FMA4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) @@ -64,11 +76,17 @@ #define INLINE_MMXEXT(flags)CPUEXT_SUFFIX(flags, _INLINE, MMXEXT) #define INLINE_SSE(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE) #define INLINE_SSE2(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE2) +#define INLINE_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE2) +#define INLINE_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE2) #define INLINE_SSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE3) +#define INLINE_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE3) +#define INLINE_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE3) #define INLINE_SSSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSSE3) #define INLINE_SSE4(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE4) #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42) #define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX) +#define INLINE_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, AVX) +#define INLINE_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, AVX) #define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP) #define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3) #define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4) -- 2.4.1
Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC
On 19/08/15 4:43 PM, Anton Khirnov wrote: --- libavcodec/hevc.c | 6 +- libavcodec/hevc.h | 2 +- libavcodec/hevcdsp.c | 24 +- libavcodec/hevcdsp.h | 5 +- libavcodec/hevcdsp_template.c | 8 +- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_mc.asm| 816 ++ libavcodec/x86/hevcdsp_init.c | 405 + 8 files changed, 1258 insertions(+), 11 deletions(-) create mode 100644 libavcodec/x86/hevc_mc.asm I'm getting segmentation faults with quite a few of samples. For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC
On 19/08/15 8:23 PM, Ronald S. Bultje wrote: Hi, On Wed, Aug 19, 2015 at 6:34 PM, James Almer jamr...@gmail.com wrote: On 19/08/15 4:43 PM, Anton Khirnov wrote: --- libavcodec/hevc.c | 6 +- libavcodec/hevc.h | 2 +- libavcodec/hevcdsp.c | 24 +- libavcodec/hevcdsp.h | 5 +- libavcodec/hevcdsp_template.c | 8 +- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_mc.asm| 816 ++ libavcodec/x86/hevcdsp_init.c | 405 + 8 files changed, 1258 insertions(+), 11 deletions(-) create mode 100644 libavcodec/x86/hevc_mc.asm I'm getting segmentation faults with quite a few of samples. For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts So, at the risk of godwin, why was this reimplemented from scratch, rather than basing it on what ffmpeg has? How could this possibly be an advantage to our users? Or OpenHEVC for that matter, which is the source of almost every hevc asm optimization, x86 or otherwise, and a project that afaik branched off libav. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] h264: Fix faulty call to avpriv_request_sample
On 21/08/15 8:43 PM, Luca Barbato wrote: Broken in f9ab4fe1f7c1e9d410ca5ee2c9ff8d2892aad068 --- Sorry. libavcodec/h264_sei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c index 361d4de..ddf1b6f 100644 --- a/libavcodec/h264_sei.c +++ b/libavcodec/h264_sei.c @@ -171,7 +171,7 @@ static int decode_registered_user_data_closed_caption(H264Context *h, int size) } } else { int i; -avpriv_request_sample(Subtitles with data type 0x%02x, +avpriv_request_sample(h-avctx, Subtitles with data type 0x%02x, user_data_type_code); for (i = 0; i size - 1; i++) skip_bits(h-gb, 8); -- 2.5.0 Looks good. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC
On 21/08/15 4:19 AM, Anton Khirnov wrote: + +add dstq, dststrideq +add srcq, srcstrideq + +%assign i (i + 1) +%endrep + +dec heightq This and every other case should be heightd. There's no guarantee the high bits will be zero on every x86_64 target. This is the source of the crashes i was getting. +jg .loop +RET +%endmacro + +INIT_XMM sse2 +GET_PIXELS 4, 8, 1 +GET_PIXELS 8, 8, 1 +GET_PIXELS 12, 8, 3 +GET_PIXELS 16, 8, 2 +GET_PIXELS 24, 8, 3 +GET_PIXELS 32, 8, 3 +GET_PIXELS 48, 8, 3 +GET_PIXELS 64, 8, 3 + +GET_PIXELS 4, 10, 1 +GET_PIXELS 8, 10, 1 +GET_PIXELS 12, 10, 3 +GET_PIXELS 16, 10, 2 +GET_PIXELS 24, 10, 3 +GET_PIXELS 32, 10, 3 +GET_PIXELS 48, 10, 3 +GET_PIXELS 64, 10, 3 + +; hevc_qpel_h/v_w_8(int16_t *dst, ptrdiff_t dststride, +; uint8_t *src, ptrdiff_t srcstride, +; int height, int mx, int my, int *mcbuffer) + +; 8-bit qpel interpolation +; %1: block width +; %2: 0 - horizontal; 1 - vertical +%macro QPEL_8 2 +%if %2 +%define postfixv +%define mvfrac myq Same here and below the else, rename this to mvfracq and add a mvfracd. +%define pixstride srcstrideq +%define pixstride3 sstride3q +%define src_m3 srcm3q +%else +%define postfixh +%define mvfrac mxq +%define pixstride 1 +%define pixstride3 3 +%define src_m3 (srcq - 3) +%endif + +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg +%if %2 +and mvfrac, 0x3 +%endif +dec mvfrac +shl mvfrac, 4 Use mvfracd on these three, it will clear the high bits for the mova below. +lea coeffsregq, [hevc_qpel_coeffs8] +mova m0, [coeffsregq + mvfrac] Then use mvfraq here. Replicate this on every function, of course. + +%macro PUT_WEIGHTED_PRED 3 +%if %1 +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, height +%else +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, offset0, dst, dststride, src0, srcstride, height +%endif +and heightq,0x7fff You should be able to remove this after the above changes. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [libav-commits] h264: Discard currently unsupported registered sei
Module: libav Branch: master Commit: f9ab4fe1f7c1e9d410ca5ee2c9ff8d2892aad068 Author:John Högberg john.hogberg at ericsson.com Committer: Luca Barbato lu_zero at gentoo.org Date: Fri Aug 7 19:30:38 2015 + h264: Discard currently unsupported registered sei Signed-off-by: Luca Barbato lu_zero at gentoo.org --- libavcodec/h264_sei.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c index 8b07682..361d4de 100644 --- a/libavcodec/h264_sei.c +++ b/libavcodec/h264_sei.c @@ -169,6 +169,12 @@ static int decode_registered_user_data_closed_caption(H264Context *h, int size) skip_bits(h-gb, 8); // marker_bits } } +} else { +int i; +avpriv_request_sample(Subtitles with data type 0x%02x, This should be avpriv_request_sample(h-avctx, Subtitles with data type 0x%02x, Or similar. FATE is almost 50% red by now because of this... ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC
On 21/08/15 4:19 AM, Anton Khirnov wrote: +%macro PUT_WEIGHTED_PRED 3 +%if %1 +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, height +%else +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, offset0, dst, dststride, src0, srcstride, height +%endif +and heightq,0x7fff + +add denomq, 14 + %1 - %3 +movqm0, denomq demon is an uint8_t. This should be add denomd, 14 + %1 - %3 movdm0, denomd I don't think doing a movzx denomd, denomb to clear bits 9 to 31 is necessary, so the above should suffice. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC
On 22/08/15 1:16 PM, Anton Khirnov wrote: +%macro QPEL_8 2 +%if %2 +%define postfixv +%define mvfrac myq Same here and below the else, rename this to mvfracq and add a mvfracd. +%define pixstride srcstrideq +%define pixstride3 sstride3q +%define src_m3 srcm3q +%else +%define postfixh +%define mvfrac mxq +%define pixstride 1 +%define pixstride3 3 +%define src_m3 (srcq - 3) +%endif + +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg This should be 7, 10, 7, Otherwise you're loading sstride3 from stack as if it were a function argument. Ideally though, for vertical you'd use 5, 9, 7 then manually load either mx or my instead of both, saving one register, or even 5, 8, 7, since coeffsreg and mvfrac are only used during init, and you can easily reuse one of those two registers for sstride3 or srcm3. You can also push it down to 4, 7, 7 if you manually load height before or after the SPLATWs and reuse the regs for coeffsreg and mvfrac. As a plus, this would make the functions work with x86_32. For horizontal you don't even need sstride3 or srcm3, so you definitely should declare and use less registers. Didn't check other functions but I'm sure similar optimizations can be done. +%if %2 +and mvfrac, 0x3 +%endif +dec mvfrac +shl mvfrac, 4 Use mvfracd on these three, it will clear the high bits for the mova below. anding the whole register with 3/7 should also work fine, with less clutter. and mvfrac, 0x3 is only in ff_hevc_qpel_v_* functions, but not ff_hevc_qpel_h_*. It's the same with the and mvfrac, 0x7 cases below. You need to use the d suffix instead of q on the register names to make sure the high bits are cleared. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC
On 23/08/15 3:27 PM, Anton Khirnov wrote: Quoting James Almer (2015-08-22 23:58:41) On 22/08/15 1:16 PM, Anton Khirnov wrote: +%macro QPEL_8 2 +%if %2 +%define postfixv +%define mvfrac myq Same here and below the else, rename this to mvfracq and add a mvfracd. +%define pixstride srcstrideq +%define pixstride3 sstride3q +%define src_m3 srcm3q +%else +%define postfixh +%define mvfrac mxq +%define pixstride 1 +%define pixstride3 3 +%define src_m3 (srcq - 3) +%endif + +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg This should be 7, 10, 7, Otherwise you're loading sstride3 from stack as if it were a function argument. Ideally though, for vertical you'd use 5, 9, 7 then manually load either mx or my instead of both, saving one register, or even 5, 8, 7, since coeffsreg and mvfrac are only used during init, and you can easily reuse one of those two registers for sstride3 or srcm3. You can also push it down to 4, 7, 7 if you manually load height before or after the SPLATWs and reuse the regs for coeffsreg and mvfrac. As a plus, this would make the functions work with x86_32. For horizontal you don't even need sstride3 or srcm3, so you definitely should declare and use less registers. Didn't check other functions but I'm sure similar optimizations can be done. +%if %2 +and mvfrac, 0x3 +%endif +dec mvfrac +shl mvfrac, 4 Use mvfracd on these three, it will clear the high bits for the mova below. anding the whole register with 3/7 should also work fine, with less clutter. and mvfrac, 0x3 is only in ff_hevc_qpel_v_* functions, but not ff_hevc_qpel_h_*. It's the same with the and mvfrac, 0x7 cases below. Sure, I meant to change the code so it's done in both paths. It's not necessary. Just use the 32bit gprs. You need to use the d suffix instead of q on the register names to make sure the high bits are cleared. Eh? Perhaps I'm misunderstading something, but I'd expect that using d here would do exactly the opposite and keep the random data in the high bits. No, using d to write a gprs on x86_64 will clear the high bits (32 to 63) in a similar way that using VEX coding instructions to write xmm registers will clear bits 128 to 255 on ymm registers. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC
On 20/08/15 3:35 AM, Anton Khirnov wrote: Quoting James Almer (2015-08-20 00:34:58) On 19/08/15 4:43 PM, Anton Khirnov wrote: --- libavcodec/hevc.c | 6 +- libavcodec/hevc.h | 2 +- libavcodec/hevcdsp.c | 24 +- libavcodec/hevcdsp.h | 5 +- libavcodec/hevcdsp_template.c | 8 +- libavcodec/x86/Makefile | 3 +- libavcodec/x86/hevc_mc.asm| 816 ++ libavcodec/x86/hevcdsp_init.c | 405 + 8 files changed, 1258 insertions(+), 11 deletions(-) create mode 100644 libavcodec/x86/hevc_mc.asm I'm getting segmentation faults with quite a few of samples. For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts Cannot reproduce here. Can you give me more details (system, where exactly does it crash, etc.)? Mingw-w64 GCC 5.2.0. It also crashes with checkasm after patch 7/8, but in a different place. With checkasm i get: Program received signal SIGSEGV, Segmentation fault. 0x0046ae00 in put_hevc_qpel_pixels_4_8 () at /home/jamrial/libav/libavcodec/hevcdsp_template.c:41 41 } (gdb) disass $pc-32,$pc+32 Dump of assembler code from 0x46ade0 to 0x46ae20: 0x0046ade0 put_hevc_qpel_pixels_4_8+0: mov0x28(%rsp),%r11d 0x0046ade5 put_hevc_qpel_pixels_4_8+5: xor%r10d,%r10d 0x0046ade8 put_hevc_qpel_pixels_4_8+8: and $0xfffe,%rdx 0x0046adec put_hevc_qpel_pixels_4_8+12:test %r11d,%r11d 0x0046adef put_hevc_qpel_pixels_4_8+15:jle0x46ae3c put_hevc_qpel_pixels_4_8+92 0x0046adf1 put_hevc_qpel_pixels_4_8+17:nopl 0x0(%rax,%rax,1) 0x0046adf6 put_hevc_qpel_pixels_4_8+22:nopw %cs:0x0(%rax,%rax,1) = 0x0046ae00 put_hevc_qpel_pixels_4_8+32:movzbl (%r8),%eax 0x0046ae04 put_hevc_qpel_pixels_4_8+36:inc%r10d 0x0046ae07 put_hevc_qpel_pixels_4_8+39:shl$0x6,%eax 0x0046ae0a put_hevc_qpel_pixels_4_8+42:mov%ax,(%rcx) 0x0046ae0d put_hevc_qpel_pixels_4_8+45:movzbl 0x1(%r8),%eax 0x0046ae12 put_hevc_qpel_pixels_4_8+50:shl$0x6,%eax 0x0046ae15 put_hevc_qpel_pixels_4_8+53:mov%ax,0x2(%rcx) 0x0046ae19 put_hevc_qpel_pixels_4_8+57:movzbl 0x2(%r8),%eax 0x0046ae1e put_hevc_qpel_pixels_4_8+62:shl$0x6,%eax End of assembler dump. (gdb) info all-registers rax0x2c80 11392 rbx0xed56bb2dcb3c7736 -1344681633365854410 rcx0xdeadbeef00224c50 -2401053092609897392 rdx0xdeadbeef0020 -2401053092612145120 rsi0x75b6ba21077c48ad 8482171599221180589 rdi0x21f86d66c8ca00ce 2447826685698638030 rbp0x8bda43d3fd1a7e06 0x8bda43d3fd1a7e06 rsp0x2192b8 0x2192b8 r8 0xdeadbeef00219af3 -2401053092609942797 r9 0xdeadbeef0010 -2401053092612145136 r100x1 1 r110x10 16 r120xb64a9c9e5d318408 -5311260606547786744 r130xdf9a54b303f1d3a3 -2334460328996121693 r140x4a75479abd64e097 5365273261009854615 r150x249214109d5d1c88 2635190793557318792 rip0x46ae00 0x46ae00 put_hevc_qpel_pixels_4_8+32 With bbb_1080p_c.ts i get: Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 1044.0x6ac] 0x00a91391 in ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop () (gdb) disass $pc-32,$pc+32 Dump of assembler code from 0xa91371 to 0xa913b1: 0x00a91371 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+31: pop%rax 0x00a91372 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+32: adc%ah,0x41(%rsi) 0x00a91375 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+35: paddsw 0x10(%rcx),%mm3 0x00a91379 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+39: paddsw %xmm0,%xmm3 0x00a9137d ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+43: psraw $0x7,%xmm3 0x00a91382 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+48: packuswb %xmm3,%xmm3 0x00a91386 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+52: movq %xmm3,0x8(%rcx) 0x00a9138b ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+57: movdqa 0x20(%r8),%xmm3 = 0x00a91391 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+63: paddsw 0x20(%r9),%xmm3 0x00a91397 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+69: paddsw %xmm0,%xmm3 0x00a9139b ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+73: psraw $0x7,%xmm3 0x00a913a0 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+78: packuswb %xmm3,%xmm3 0x00a913a4 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+82: movq %xmm3,0x10(%rcx) 0x00a913a9
[libav-devel] [PATCH] rtmpproto: free hmac context properly
Signed-off-by: James Almer jamr...@gmail.com --- libavformat/rtmpproto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 1db7495..ec4b0e7 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -971,7 +971,7 @@ int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap, } av_hmac_final(hmac, dst, 32); -av_free(hmac); +av_hmac_free(hmac); return 0; } -- 2.5.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel