from:"James Almer"

[libav-devel] [PATCH 1/3] x86: add missing XOP checks and macros

2014-02-21 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 configure   | 5 +
 libavutil/x86/cpu.h | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/configure b/configure
index 72cf831..96cbe5d 100755
--- a/configure
+++ b/configure
@@ -270,6 +270,7 @@ Optimization options (experts only):
   --disable-sse4   disable SSE4 optimizations
   --disable-sse42  disable SSE4.2 optimizations
   --disable-avxdisable AVX optimizations
+  --disable-xopdisable XOP optimizations
   --disable-fma4   disable FMA4 optimizations
   --disable-avx2   disable AVX2 optimizations
   --disable-armv5tedisable armv5te optimizations
@@ -1252,6 +1253,7 @@ ARCH_EXT_LIST_X86='
 avx
 avx2
 cpunop
+xop
 fma4
 i686
 mmx
@@ -1575,6 +1577,7 @@ ssse3_deps=sse3
 sse4_deps=ssse3
 sse42_deps=sse4
 avx_deps=sse42
+xop_deps=avx
 fma4_deps=avx
 avx2_deps=avx
 
@@ -3757,6 +3760,7 @@ EOF
 
 check_yasm movbe ecx, [5]  enable yasm ||
 die yasm/nasm not found or too old. Use --disable-yasm for a 
crippled build.
+check_yasm vpmacsdd xmm0, xmm1, xmm2, xmm3 || disable xop_external
 check_yasm vfmaddps ymm0, ymm1, ymm2, ymm3 || disable fma4_external
 check_yasm CPU amdnop  enable cpunop
 fi
@@ -4289,6 +4293,7 @@ if enabled x86; then
 echo SSE enabled   ${sse-no}
 echo SSSE3 enabled ${ssse3-no}
 echo AVX enabled   ${avx-no}
+echo XOP enabled   ${xop-no}
 echo FMA4 enabled  ${fma4-no}
 echo i686 features enabled ${i686-no}
 echo CMOV is fast  ${fast_cmov-no}
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 5303c5a..40daf44 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -37,6 +37,7 @@
 #define X86_SSE4(flags) CPUEXT(flags, SSE4)
 #define X86_SSE42(flags)CPUEXT(flags, SSE42)
 #define X86_AVX(flags)  CPUEXT(flags, AVX)
+#define X86_XOP(flags)  CPUEXT(flags, XOP)
 #define X86_FMA4(flags) CPUEXT(flags, FMA4)
 #define X86_AVX2(flags) CPUEXT(flags, AVX2)
 
@@ -51,6 +52,7 @@
 #define EXTERNAL_SSE4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4)
 #define EXTERNAL_SSE42(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
 #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
+#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
 #define EXTERNAL_FMA4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
 #define EXTERNAL_AVX2(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
 
@@ -65,6 +67,7 @@
 #define INLINE_SSE4(flags)  CPUEXT_SUFFIX(flags, _INLINE, SSE4)
 #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
 #define INLINE_AVX(flags)   CPUEXT_SUFFIX(flags, _INLINE, AVX)
+#define INLINE_XOP(flags)   CPUEXT_SUFFIX(flags, _INLINE, XOP)
 #define INLINE_FMA4(flags)  CPUEXT_SUFFIX(flags, _INLINE, FMA4)
 #define INLINE_AVX2(flags)  CPUEXT_SUFFIX(flags, _INLINE, AVX2)
 
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/3] x86: add detection for Bit Manipulation Instruction sets

2014-02-21 Thread James Almer

Based on x264 code

Signed-off-by: James Almer jamr...@gmail.com
---
 libavutil/cpu.c |  6 ++
 libavutil/cpu.h |  2 ++
 libavutil/x86/cpu.c | 16 +++-
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 972e4eb..d651eb2 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -90,6 +90,8 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
 #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
+#define CPUFLAG_BMI1 (AV_CPU_FLAG_BMI1)
+#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | CPUFLAG_BMI1)
 static const AVOption cpuflags_opts[] = {
 { flags   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, 
INT64_MAX, .unit = flags },
 #if   ARCH_PPC
@@ -111,6 +113,8 @@ int av_parse_cpu_flags(const char *s)
 { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
 { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2
 },.unit = flags },
+{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI1
 },.unit = flags },
+{ bmi2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI2
 },.unit = flags },
 { 3dnow   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW   
 },.unit = flags },
 { 3dnowext, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT
 },.unit = flags },
 { cmov, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV
 },.unit = flags },
@@ -212,6 +216,8 @@ static const struct {
 { AV_CPU_FLAG_3DNOWEXT,  3dnowext   },
 { AV_CPU_FLAG_CMOV,  cmov   },
 { AV_CPU_FLAG_AVX2,  avx2   },
+{ AV_CPU_FLAG_BMI1,  bmi1   },
+{ AV_CPU_FLAG_BMI2,  bmi2   },
 #endif
 { 0 }
 };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 934b3be..517c520 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -50,6 +50,8 @@
 #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov
 #define AV_CPU_FLAG_AVX2 0x8000 /// AVX2 functions: requires OS 
support even if YMM registers aren't used
 #define AV_CPU_FLAG_FMA30x1 /// Haswell FMA3 functions
+#define AV_CPU_FLAG_BMI10x2 /// Bit Manipulation Instruction Set 1
+#define AV_CPU_FLAG_BMI20x4 /// Bit Manipulation Instruction Set 2
 
 #define AV_CPU_FLAG_ALTIVEC  0x0001 /// standard
 
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index bf5e9fc..4c96c27 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -137,16 +137,22 @@ int ff_get_cpu_flags_x86(void)
 rval |= AV_CPU_FLAG_FMA3;
 }
 }
-#if HAVE_AVX2
+#endif /* HAVE_AVX */
+#endif /* HAVE_SSE */
 if (max_std_level = 7) {
 cpuid(7, eax, ebx, ecx, edx);
+#if HAVE_AVX2
 if (ebx0x0020)
 rval |= AV_CPU_FLAG_AVX2;
-/* TODO: BMI1/2 */
-}
 #endif /* HAVE_AVX2 */
-#endif /* HAVE_AVX */
-#endif /* HAVE_SSE */
+/* BMI1/2 don't need OS support */
+if (ebx0x0008)
+{
+rval |= AV_CPU_FLAG_BMI1;
+if (ebx0x0100)
+rval |= AV_CPU_FLAG_BMI2;
+}
+}
 }
 
 cpuid(0x8000, max_ext_level, ebx, ecx, edx);
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/3] x86: add detection for FMA3 instruction set

2014-02-21 Thread James Almer

Based on x264 code

Signed-off-by: James Almer jamr...@gmail.com
---
 configure   | 5 +
 libavutil/cpu.c | 3 +++
 libavutil/cpu.h | 1 +
 libavutil/x86/cpu.c | 5 -
 libavutil/x86/cpu.h | 3 +++
 5 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 96cbe5d..82077b5 100755
--- a/configure
+++ b/configure
@@ -271,6 +271,7 @@ Optimization options (experts only):
   --disable-sse42  disable SSE4.2 optimizations
   --disable-avxdisable AVX optimizations
   --disable-xopdisable XOP optimizations
+  --disable-fma3   disable FMA3 optimizations
   --disable-fma4   disable FMA4 optimizations
   --disable-avx2   disable AVX2 optimizations
   --disable-armv5tedisable armv5te optimizations
@@ -1254,6 +1255,7 @@ ARCH_EXT_LIST_X86='
 avx2
 cpunop
 xop
+fma3
 fma4
 i686
 mmx
@@ -1578,6 +1580,7 @@ sse4_deps=ssse3
 sse42_deps=sse4
 avx_deps=sse42
 xop_deps=avx
+fma3_deps=avx
 fma4_deps=avx
 avx2_deps=avx
 
@@ -3761,6 +3764,7 @@ EOF
 check_yasm movbe ecx, [5]  enable yasm ||
 die yasm/nasm not found or too old. Use --disable-yasm for a 
crippled build.
 check_yasm vpmacsdd xmm0, xmm1, xmm2, xmm3 || disable xop_external
+check_yasm vfmadd132ps ymm0, ymm1, ymm2|| disable fma3_external
 check_yasm vfmaddps ymm0, ymm1, ymm2, ymm3 || disable fma4_external
 check_yasm CPU amdnop  enable cpunop
 fi
@@ -4294,6 +4298,7 @@ if enabled x86; then
 echo SSSE3 enabled ${ssse3-no}
 echo AVX enabled   ${avx-no}
 echo XOP enabled   ${xop-no}
+echo FMA3 enabled  ${fma3-no}
 echo FMA4 enabled  ${fma4-no}
 echo i686 features enabled ${i686-no}
 echo CMOV is fast  ${fast_cmov-no}
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 8c2cfb8..972e4eb 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -87,6 +87,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4)
 #define CPUFLAG_AVX  (AV_CPU_FLAG_AVX  | CPUFLAG_SSE42)
 #define CPUFLAG_XOP  (AV_CPU_FLAG_XOP  | CPUFLAG_AVX)
+#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
 #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
 static const AVOption cpuflags_opts[] = {
@@ -107,6 +108,7 @@ int av_parse_cpu_flags(const char *s)
 { sse4.2  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42   
 },.unit = flags },
 { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX 
 },.unit = flags },
 { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP 
 },.unit = flags },
+{ fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
 { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2
 },.unit = flags },
 { 3dnow   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW   
 },.unit = flags },
@@ -204,6 +206,7 @@ static const struct {
 { AV_CPU_FLAG_SSE42, sse4.2 },
 { AV_CPU_FLAG_AVX,   avx},
 { AV_CPU_FLAG_XOP,   xop},
+{ AV_CPU_FLAG_FMA3,  fma3   },
 { AV_CPU_FLAG_FMA4,  fma4   },
 { AV_CPU_FLAG_3DNOW, 3dnow  },
 { AV_CPU_FLAG_3DNOWEXT,  3dnowext   },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 29036e3..934b3be 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -49,6 +49,7 @@
 #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions
 #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov
 #define AV_CPU_FLAG_AVX2 0x8000 /// AVX2 functions: requires OS 
support even if YMM registers aren't used
+#define AV_CPU_FLAG_FMA30x1 /// Haswell FMA3 functions
 
 #define AV_CPU_FLAG_ALTIVEC  0x0001 /// standard
 
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 0e06d5d..bf5e9fc 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void)
 if ((ecx  0x1800) == 0x1800) {
 /* Check for OS support */
 xgetbv(0, eax, edx);
-if ((eax  0x6) == 0x6)
+if ((eax  0x6) == 0x6) {
 rval |= AV_CPU_FLAG_AVX;
+if (ecx0x1000)
+rval |= AV_CPU_FLAG_FMA3;
+}
 }
 #if HAVE_AVX2
 if (max_std_level = 7) {
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 40daf44..50da30e 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -38,6 +38,7 @@
 #define X86_SSE42(flags)CPUEXT(flags, SSE42)
 #define X86_AVX(flags)  CPUEXT(flags, AVX)
 #define X86_XOP(flags

[libav-devel] [PATCH 0/3] support for FMA3 and BMI intruction sets

2014-02-21 Thread James Almer

These are the missing instruction sets introduced with Haswell/Piledriver CPUs.

Last two patches are based on x264 detection code.

James Almer (3):
  x86: add missing XOP checks and macros
  x86: add detection for FMA3 instruction set
  x86: add detection for Bit Manipulation Instruction sets

 configure   | 10 ++
 libavutil/cpu.c |  9 +
 libavutil/cpu.h |  3 +++
 libavutil/x86/cpu.c | 21 +++--
 libavutil/x86/cpu.h |  6 ++
 5 files changed, 43 insertions(+), 6 deletions(-)

-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 0/3] support for FMA3 and BMI intruction sets

2014-02-22 Thread James Almer

On 22/02/14 1:29 PM, Luca Barbato wrote:
 On 22/02/14 06:53, James Almer wrote:
 These are the missing instruction sets introduced with Haswell/Piledriver 
 CPUs.

 Last two patches are based on x264 detection code.
 
 The set doesn't look bad at all but I'm wondering about it's usage:
 
 James Almer (3):
   x86: add missing XOP checks and macros
   x86: add detection for FMA3 instruction set
 
 Which is the relationship between FMA3 and FMA4 ?
 

It's what happens when Intel and AMD don't talk to each other to coordinate 
stuff.
Short story summary here: https://en.wikipedia.org/wiki/FMA_instruction_set

Both sets do the same in essence. The only difference is that one uses three 
operands 
while the other uses four (Technically, FMA4 is the most flexible, but it's 
only 
supported by AMD).
AMD added FMA4 starting with Bulldozer, then added FMA3 as well to Piledriver 
for 
compatibility reasons once Intel revealed they would use that starting with 
Haswell.

   x86: add detection for Bit Manipulation Instruction sets
 
 Is BMI a subset of AVX2?

No, they are independent and work on general registers.
AMD added BMI1 starting with Piledriver, which doesn't support AVX2, whereas 
Intel 
added both BMI1 and BMI2 starting with Haswell.

 
 lu
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel
 

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 4/5] configure: Move cpunop into ARCH_EXT_LIST_X86

2014-02-22 Thread James Almer

On 22/02/14 11:57 PM, Luca Barbato wrote:
 On 23/02/14 00:52, Dave Yeo wrote:
 
 HAVE_LIST has ARCH_EXT_LIST
 
 ARCH_EXT_LIST has ARCH_EXT_LIST_X86
 
 I'm wondering why it is broken for you since it should not.
 
 lu

https://fate.libav.org/x86.os2.444/2014002516

Probably related.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/3] x86: add detection for Bit Manipulation Instruction sets

2014-02-23 Thread James Almer

On 23/02/14 11:20 AM, Janne Grunau wrote:
 Do you plan to write assembly using any of these instructions? Having
 the tests while not using the instructions is just an exercise in
 completeness.
 
 Janne

No, not for BMI1/2. I saw the TODO line as i was adding FMA3 so i 
thought i might as well get that out of the way while at it.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse

2014-03-03 Thread James Almer

Build only on x86_32 targets.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 55 +---
 libavcodec/x86/dcadsp_init.c | 44 +--
 2 files changed, 69 insertions(+), 30 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 56039ba..970ec3d 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -199,15 +199,31 @@ INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
-INIT_XMM sse2
+%macro SETZERO 1
+%if cpuflag(sse2)
+pxor  %1, %1
+%else
+xorps %1, %1, %1
+%endif
+%endmacro
+
+%macro SHUF 2
+%if cpuflag(sse2)
+pshufd%1, %2, q0123
+%else
+mova  %1, %2
+shufps%1, %1, q0123
+%endif
+%endmacro
+
 %macro INNER_LOOP   1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123
+SHUF  m5, [ptr2 + j + (15 - 3) * 4]
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-pshufd   m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123
+SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
 mova m12, [ptr1 + j + mmsize]
 %endif
 mulps m6, [win  + %1 + j + 16 * 4]
@@ -224,10 +240,10 @@ INIT_XMM sse2
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123
+SHUF  m6, [ptr2 + j + (31 - 3) * 4]
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-pshufd   m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123
+SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
 mulps m5, [win  + %1 + j + 32 * 4]
@@ -245,20 +261,25 @@ INIT_XMM sse2
 subj, 64 * 4
 %endmacro
 
-; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32],
-; const float window[512], float out[32],
-; intptr_t offset, float scale)
+; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32],
+;  const float window[512], float out[32],
+;  intptr_t offset, float scale)
+%macro SYNTH_FILTER 0
 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
+%if cpuflag(sse2)
 movd   scale, scalem
+%else
+movss  scale, scalem
+%endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
 %define OFFQ  offq
 %endif
-pshufdm0, m0, 0
+SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
@@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %endif
 .mainloop
 ; m1 = a  m2 = b  m3 = c  m4 = d
-pxor  m3, m3
-pxor  m4, m4
+SETZERO   m3
+SETZERO   m4
 mova  m1, [buf2 + i]
 mova  m2, [buf2 + i + 16 * 4]
 %if ARCH_X86_32
@@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %define ptr2 r7q ; must be loaded
 %define win  r8q
 %define jr9q
-pxor  m9, m9
-pxor m10, m10
+SETZERO   m9
+SETZERO  m10
 mova  m7, [buf2 + i + mmsize]
 mova  m8, [buf2 + i + mmsize + 16 * 4]
 lea  win, [windowq + i]
@@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 subi, (ARCH_X86_64 + 1) * mmsize
 jge.mainloop
 RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_XMM sse
+SYNTH_FILTER
+%endif
+INIT_XMM sse2
+SYNTH_FILTER
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 3821892..f8dd9b1 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -56,29 +56,39 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 }
 }
 
-void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32],
-const float window[512],
-float out[32], intptr_t offset, float scale);
+#define SYNTH_FILTER_FUNC(opt) 
\
+void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   
\
+ const float window[512],  
\
+ float out[32], intptr_t offset, float scale); 
\
+static void synth_filter_##opt(FFTContext *imdct,  
\
+   float *synth_buf_ptr, int

[libav-devel] [PATCH 0/3] synth filter float ASM

2014-03-03 Thread James Almer

Here are some extra implementations that extend Christophe's work.

The first one (SSE) is only for x86_32 targets as x86_64 guarantees SSE2 is 
available.

Second patch is an AVX implementation using ymm registers.
In my tests it was about 30 cycles faster than SSE2 on a Sandy Bridge CPU.

I don't have proper numbers for the third patch since i could only test on an 
AMD 
rig, where functions using ymm registers tend to have subpar performance.
It still beat the AVX version by a decent marging, though, so Haswell should 
see 
a nice boost with it.

I could add an FMA4 version using xmm registers, which would benefit AMD users 
unlike these AVX/FMA3 ymm ones. Thoughts?

James Almer (3):
  x86/synth_filter: add synth_filter_fma3
  x86/synth_filter: add synth_filter_sse
  x86/synth_filter: add synth_filter_avx

 libavcodec/x86/dcadsp.asm| 109 ---
 libavcodec/x86/dcadsp_init.c |  52 ++---
 2 files changed, 107 insertions(+), 54 deletions(-)

-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/3] x86/synth_filter: add synth_filter_avx

2014-03-03 Thread James Almer

Sandy Bridge Win64:
180 cycles in ff_synth_filter_inner_sse2
150 cycles in ff_synth_filter_inner_avx

Also switch to a three operand format for some instructions to avoid 
assembly errors with Yasm 1.1.0 or older.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 76 +---
 libavcodec/x86/dcadsp_init.c |  4 +++
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 970ec3d..0d7c86e 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -200,18 +200,22 @@ DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
 %macro SETZERO 1
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 pxor  %1, %1
 %else
 xorps %1, %1, %1
 %endif
 %endmacro
 
-%macro SHUF 2
-%if cpuflag(sse2)
-pshufd%1, %2, q0123
+%macro SHUF 3
+%if cpuflag(avx)
+mova  %3, [%2 - 16]
+vperm2f128%1, %3, %3, 1
+vshufps   %1, %1, %1, q0123
+%elif cpuflag(sse2)
+pshufd%1, [%2], q0123
 %else
-mova  %1, %2
+mova  %1, [%2]
 shufps%1, %1, q0123
 %endif
 %endmacro
@@ -220,43 +224,43 @@ DCA_LFE_FIR 1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-SHUF  m5, [ptr2 + j + (15 - 3) * 4]
+SHUF  m5,  ptr2 + j + (15 - 3) * 4, m6
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
+SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
-mulps m6, [win  + %1 + j + 16 * 4]
-mulps m5, [win  + %1 + j]
+mulps m6, m6,  [win + %1 + j + 16 * 4]
+mulps m5, m5,  [win + %1 + j]
 %if ARCH_X86_64
-mulpsm12, [win  + %1 + j + mmsize + 16 * 4]
-mulpsm11, [win  + %1 + j + mmsize]
+mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize]
 %endif
-addps m2, m6
-subps m1, m5
+addps m2, m2, m6
+subps m1, m1, m5
 %if ARCH_X86_64
-addps m8, m12
-subps m7, m11
+addps m8, m8, m12
+subps m7, m7, m11
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-SHUF  m6, [ptr2 + j + (31 - 3) * 4]
+SHUF  m6,  ptr2 + j + (31 - 3) * 4, m5
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
+SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
-mulps m5, [win  + %1 + j + 32 * 4]
-mulps m6, [win  + %1 + j + 48 * 4]
+mulps m5, m5,  [win + %1 + j + 32 * 4]
+mulps m6, m6,  [win + %1 + j + 48 * 4]
 %if ARCH_X86_64
-mulpsm11, [win  + %1 + j + mmsize + 32 * 4]
-mulpsm12, [win  + %1 + j + mmsize + 48 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4]
+mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4]
 %endif
-addps m3, m5
-addps m4, m6
+addps m3, m3, m5
+addps m4, m4, m6
 %if ARCH_X86_64
-addps m9, m11
-addpsm10, m12
+addps m9, m9, m11
+addpsm10, m10, m12
 %endif
 subj, 64 * 4
 %endmacro
@@ -269,17 +273,21 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 movd   scale, scalem
+SPLATDm0
 %else
-movss  scale, scalem
+VBROADCASTSS  m0, scalem
 %endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
+SPLATD  xmm0
+%if cpuflag(avx)
+vinsertf128   m0, m0, xmm0, 1
+%endif
 %define OFFQ  offq
 %endif
-SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
@@ -346,11 +354,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 %endif
 ;~ out[i]  = a * scale;
 ;~ out[i + 16] = b * scale;
-mulps m1, scale
-mulps m2, scale
+mulps m1, m1, scale
+mulps m2, m2, scale
 %if ARCH_X86_64
-mulps m7, scale
-mulps m8, scale
+mulps m7, m7, scale
+mulps m8, m8, scale
 %endif
 ;~ synth_buf2[i]  = c;
 ;~ synth_buf2[i + 16] = d;
@@ -379,3 +387,7 @@ SYNTH_FILTER
 %endif
 INIT_XMM sse2
 SYNTH_FILTER
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+SYNTH_FILTER
+%endif
diff --git a/libavcodec/x86

[libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3

2014-03-03 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 28 +++-
 libavcodec/x86/dcadsp_init.c |  4 
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 0d7c86e..e1842ef 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -230,16 +230,12 @@ DCA_LFE_FIR 1
 SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
-mulps m6, m6,  [win + %1 + j + 16 * 4]
+FMULADD_PSm2, m6,  [win + %1 + j + 16 * 4], m2, m6
 mulps m5, m5,  [win + %1 + j]
-%if ARCH_X86_64
-mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4]
-mulpsm11, m11, [win + %1 + j + mmsize]
-%endif
-addps m2, m2, m6
 subps m1, m1, m5
 %if ARCH_X86_64
-addps m8, m8, m12
+FMULADD_PSm8, m12, [win + %1 + j + mmsize + 16 * 4], m8, m12
+mulpsm11, m11, [win + %1 + j + mmsize]
 subps m7, m7, m11
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
@@ -250,17 +246,11 @@ DCA_LFE_FIR 1
 SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
-mulps m5, m5,  [win + %1 + j + 32 * 4]
-mulps m6, m6,  [win + %1 + j + 48 * 4]
+FMULADD_PSm3, m5,  [win + %1 + j + 32 * 4], m3, m5
+FMULADD_PSm4, m6,  [win + %1 + j + 48 * 4], m4, m6
 %if ARCH_X86_64
-mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4]
-mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4]
-%endif
-addps m3, m3, m5
-addps m4, m4, m6
-%if ARCH_X86_64
-addps m9, m9, m11
-addpsm10, m10, m12
+FMULADD_PSm9, m11, [win + %1 + j + mmsize + 32 * 4], m9, m11
+FMULADD_PS   m10, m12, [win + %1 + j + mmsize + 48 * 4], m10, m12
 %endif
 subj, 64 * 4
 %endmacro
@@ -391,3 +381,7 @@ SYNTH_FILTER
 INIT_YMM avx
 SYNTH_FILTER
 %endif
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+SYNTH_FILTER
+%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index ab20635..132f75e 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -80,6 +80,7 @@ SYNTH_FILTER_FUNC(sse)
 #endif
 SYNTH_FILTER_FUNC(sse2)
 SYNTH_FILTER_FUNC(avx)
+SYNTH_FILTER_FUNC(fma3)
 
 av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 {
@@ -96,4 +97,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 if (EXTERNAL_AVX(cpu_flags)) {
 s-synth_filter_float = synth_filter_avx;
 }
+if (EXTERNAL_FMA3(cpu_flags)) {
+s-synth_filter_float = synth_filter_fma3;
+}
 }
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] x86: dcadsp: Fix linking with yasm and optimizations disabled

2014-03-04 Thread James Almer

On 04/03/14 3:48 PM, Diego Biurrun wrote:
 Some optimized functions reference optimized symbols, so the functions
 must be explicitly disabled when those symbols are unavailable.
 ---
  libavcodec/x86/dcadsp_init.c |4 
  1 file changed, 4 insertions(+)
 
 diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
 index 3821892..0b9428a 100644
 --- a/libavcodec/x86/dcadsp_init.c
 +++ b/libavcodec/x86/dcadsp_init.c
 @@ -60,6 +60,7 @@ void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float 
 synth_buf2[32],
  const float window[512],
  float out[32], intptr_t offset, float scale);
  
 +#if HAVE_SSE2_EXTERNAL
  static void synth_filter_sse2(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
 @@ -74,12 +75,15 @@ static void synth_filter_sse2(FFTContext *imdct,
  
  *synth_buf_offset = (*synth_buf_offset - 32)  511;
  }
 +#endif /* HAVE_SSE2_EXTERNAL */
  
  av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
  {
 +#if HAVE_SSE2_EXTERNAL
  int cpu_flags = av_get_cpu_flags();
  
  if (EXTERNAL_SSE2(cpu_flags)) {
  s-synth_filter_float = synth_filter_sse2;
  }
 +#endif /* HAVE_SSE2_EXTERNAL */
  }

Most files use HAVE_YASM for this. It's more correct and allows the addition 
of other asm functions that don't depend on HAVE_SSE2_EXTERNAL.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] libx265: More API fixes

2014-03-06 Thread James Almer

On 06/03/14 5:47 PM, Luca Barbato wrote:
 On 06/03/14 21:34, Reinhard Tartler wrote:
 Do we want this in release/10?
 
 Yes.

The current stable version (x265 0.8) has X265_BUILD == 7. This change 
would make libav 10 only support the development branch, and most users 
and even distros usually prefer compiling using stable versions of 
every library.

Wouldn't it be better to support both? A simple pre processor directive 
would be enough.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] avformat: more correct printf format specifiers

2014-03-10 Thread James Almer

On 10/03/14 11:37 AM, Diego Biurrun wrote:
 ---
  libavformat/apetag.c |6 --
  libavformat/asfdec.c |8 +---
  libavformat/avidec.c |4 ++--
  libavformat/bink.c   |   10 +++---
  libavformat/cafdec.c |5 -
  libavformat/crcenc.c |4 +++-
  libavformat/dfa.c|7 +--
  libavformat/dxa.c|5 -
  libavformat/electronicarts.c |8 +---
  libavformat/framecrcenc.c|4 +++-
  libavformat/gxf.c|6 +-
  libavformat/hnm.c|   11 +++
  libavformat/iff.c|4 +++-
  libavformat/lxfdec.c |9 ++---
  libavformat/matroskadec.c|3 ++-
  libavformat/mov.c|7 ---
  libavformat/mvi.c|5 -
  libavformat/mxfdec.c |   13 -
  libavformat/omadec.c |8 +---
  libavformat/rmdec.c  |4 +++-
  libavformat/rpl.c|4 ++--
  libavformat/smacker.c|8 ++--
  libavformat/smjpegdec.c  |8 +---
  libavformat/spdifenc.c   |8 +---
  libavformat/wtv.c|6 --
  libavformat/xmv.c|6 +++---
  26 files changed, 114 insertions(+), 57 deletions(-)

[...]

 @@ -539,14 +539,15 @@ static int mxf_read_partition_pack(void *arg, 
 AVIOContext *pb, int tag, int size
  }
  
  if (partition-kag_size = 0 || partition-kag_size  (1  20)) {
 -av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %i - guessing , 
 partition-kag_size);
 +av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %PRId32 - 
 guessing ,
 +   partition-kag_size);

PRIi32? Ditto for any similar case.

[...]

 @@ -501,7 +503,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, 
 const char *key, int ty
  return;
  
  if (type == 0  length == 4) {
 -snprintf(buf, buf_size, %PRIi32, avio_rl32(pb));
 +snprintf(buf, buf_size, %u, avio_rl32(pb));

Isn't this doing the opposite of what the patch was meant to do?
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] avformat: more correct printf format specifiers

2014-03-10 Thread James Almer

On 10/03/14 11:55 PM, Tim Walker wrote:
 On 11 Mar 2014, at 03:38, James Almer jamr...@gmail.com wrote:
 
 On 10/03/14 11:37 AM, Diego Biurrun wrote:
 ---
 libavformat/apetag.c |6 --
 libavformat/asfdec.c |8 +---
 libavformat/avidec.c |4 ++--
 libavformat/bink.c   |   10 +++---
 libavformat/cafdec.c |5 -
 libavformat/crcenc.c |4 +++-
 libavformat/dfa.c|7 +--
 libavformat/dxa.c|5 -
 libavformat/electronicarts.c |8 +---
 libavformat/framecrcenc.c|4 +++-
 libavformat/gxf.c|6 +-
 libavformat/hnm.c|   11 +++
 libavformat/iff.c|4 +++-
 libavformat/lxfdec.c |9 ++---
 libavformat/matroskadec.c|3 ++-
 libavformat/mov.c|7 ---
 libavformat/mvi.c|5 -
 libavformat/mxfdec.c |   13 -
 libavformat/omadec.c |8 +---
 libavformat/rmdec.c  |4 +++-
 libavformat/rpl.c|4 ++--
 libavformat/smacker.c|8 ++--
 libavformat/smjpegdec.c  |8 +---
 libavformat/spdifenc.c   |8 +---
 libavformat/wtv.c|6 --
 libavformat/xmv.c|6 +++---
 26 files changed, 114 insertions(+), 57 deletions(-)

 [...]

 @@ -539,14 +539,15 @@ static int mxf_read_partition_pack(void *arg, 
 AVIOContext *pb, int tag, int size
 }

 if (partition-kag_size = 0 || partition-kag_size  (1  20)) {
 -av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %i - guessing , 
 partition-kag_size);
 +av_log(mxf-fc, AV_LOG_WARNING, invalid KAGSize %PRId32 - 
 guessing ,
 +   partition-kag_size);

 PRIi32? Ditto for any similar case.
 
 Same result, but %d/%PRId32 are more commonly used than %i/%PRIi32
 

Fair enough. It was mostly a nit to keep the same specifier after expansion.


 [...]

 @@ -501,7 +503,7 @@ static void get_tag(AVFormatContext *s, AVIOContext 
 *pb, const char *key, int ty
 return;

 if (type == 0  length == 4) {
 -snprintf(buf, buf_size, %PRIi32, avio_rl32(pb));
 +snprintf(buf, buf_size, %u, avio_rl32(pb));

 Isn't this doing the opposite of what the patch was meant to do?
 
 No, avio_rl32 returns unsigned int, not uint32_t, so %u is the correct 
 specifier.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/4] lavc: Add private API to manipulate AVPacketList

2014-03-13 Thread James Almer

On 13/08/13 11:49 PM, Luca Barbato wrote:
 ---
  libavcodec/avcodec.h   |  5 +
  libavcodec/avpacket.c  | 56 
 ++
  libavcodec/internal.h  | 36 
  libavformat/avformat.h |  6 --
  4 files changed, 97 insertions(+), 6 deletions(-)

What's the status on this? I don't remember it ever being dropped and it 
certainly
wasn't pushed.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse

2014-03-15 Thread James Almer

On 14/03/14 7:56 AM, Christophe Gisquet wrote:
 Hi,
 
 2014-03-04 3:25 GMT+01:00 James Almer jamr...@gmail.com:
 -INIT_XMM sse2
 +%macro SETZERO 1
 +%if cpuflag(sse2)
 +pxor  %1, %1
 +%else
 +xorps %1, %1, %1
 +%endif
 +%endmacro
 +
 +%macro SHUF 2
 +%if cpuflag(sse2)
 +pshufd%1, %2, q0123
 +%else
 +mova  %1, %2
 +shufps%1, %1, q0123
 +%endif
 +%endmacro
 
 We already discussed this, and indeed it is worth having SSE2
 (integer) instructions instead of pure (float) SSE ones for the SSE2
 version as they are actually faster. OK from me then for the asm.
 
 Not sure if the C part still applies cleanly, but this should be minor.

It doesn't. I'll rebase and send the patchset again with some other changes 
later.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3

2014-03-15 Thread James Almer

On 14/03/14 8:02 AM, Christophe Gisquet wrote:
 Hi,
 
 2014-03-04 3:25 GMT+01:00 James Almer jamr...@gmail.com:
 snip
 Don't know fma3 but this is straightforward replacement of mul+add by
 a mac instruction. If the avx code is ok, I don't see how this
 wouldn't.

I just noticed i can replace the mul+sub instructions as well with a 
single fnmaddps, so I'll send an updated version with that change.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 5/6] truehd: break out part of output_data into platform-specific callback.

2014-03-19 Thread James Almer

On 19/03/14 2:24 PM, Ben Avison wrote:
 diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
 index bd864d9..7b7640e 100644
 --- a/libavcodec/mlpdsp.h
 +++ b/libavcodec/mlpdsp.h
 @@ -23,6 +23,7 @@
  #define AVCODEC_MLPDSP_H
  
  #include stdint.h
 +#include mlp.h
  
  void ff_mlp_rematrix_channel(int32_t *samples,
   const int32_t *coeffs,
 @@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples,
   int access_unit_size_pow2,
   int32_t mask);
  
 +int32_t ff_mlp_pack_output(int32_t lossless_check_data,
 +   int32_t (*sample_buffer)[MAX_CHANNELS],
 +   void *data,
 +   uint16_t blockpos,
 +   uint8_t max_matrix_channel,
 +   int is32,
 +   uint8_t *ch_assign,
 +   int8_t *output_shift);
 +
  typedef struct MLPDSPContext {
  void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
 int firorder, int iirorder,
 @@ -52,6 +62,18 @@ typedef struct MLPDSPContext {
   int matrix_noise_shift,
   int access_unit_size_pow2,
   int32_t mask);
 +int32_t (*(*mlp_select_pack_output)(uint8_t max_matrix_channel,
 +int is32,
 +uint8_t *ch_assign,
 +int8_t *output_shift))(int32_t, 
 int32_t (*)[], void *, uint16_t, uint8_t, int, uint8_t*, int8_t *);
 +int32_t (*mlp_pack_output)(int32_t lossless_check_data,
 +   int32_t (*sample_buffer)[MAX_CHANNELS],
 +   void *data,
 +   uint16_t blockpos,
 +   uint8_t max_matrix_channel,
 +   int is32,
 +   uint8_t *ch_assign,
 +   int8_t *output_shift);
  } MLPDSPContext;
  
  void ff_mlpdsp_init(MLPDSPContext *c);
 

Please put pointers first if possible, like you did for mlp_rematrix_channel.
Something like

+int32_t (*mlp_pack_output)(int32_t (*sample_buffer)[MAX_CHANNELS],
+   void *data,
+   uint8_t *ch_assign,
+   int8_t *output_shift,
+   int32_t lossless_check_data,
+   uint16_t blockpos,
+   uint8_t max_matrix_channel,
+   int is32);
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3

2014-03-20 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 22 ++
 libavcodec/x86/dcadsp_init.c |  6 ++
 2 files changed, 28 insertions(+)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 662cb96..59d96bf 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -230,6 +230,14 @@ DCA_LFE_FIR 1
 SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
+%if cpuflag(fma3)
+fmaddps   m2, m6,  [win + %1 + j + 16 * 4], m2
+fnmaddps  m1, m5,  [win + %1 + j], m1
+%if ARCH_X86_64
+fmaddps   m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
+fnmaddps  m7, m11, [win + %1 + j + mmsize], m7
+%endif
+%else ; non-FMA
 mulps m6, m6,  [win + %1 + j + 16 * 4]
 mulps m5, m5,  [win + %1 + j]
 %if ARCH_X86_64
@@ -242,6 +250,7 @@ DCA_LFE_FIR 1
 addps m8, m8, m12
 subps m7, m7, m11
 %endif
+%endif ; cpuflag(fma3)
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
 SHUF  m6,  ptr2 + j + (31 - 3) * 4, m5
@@ -250,6 +259,14 @@ DCA_LFE_FIR 1
 SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
+%if cpuflag(fma3)
+fmaddps   m3, m5,  [win + %1 + j + 32 * 4], m3
+fmaddps   m4, m6,  [win + %1 + j + 48 * 4], m4
+%if ARCH_X86_64
+fmaddps   m9, m11, [win + %1 + j + mmsize + 32 * 4], m9
+fmaddps  m10, m12, [win + %1 + j + mmsize + 48 * 4], m10
+%endif
+%else ; non-FMA
 mulps m5, m5,  [win + %1 + j + 32 * 4]
 mulps m6, m6,  [win + %1 + j + 48 * 4]
 %if ARCH_X86_64
@@ -262,6 +279,7 @@ DCA_LFE_FIR 1
 addps m9, m9, m11
 addpsm10, m10, m12
 %endif
+%endif ; cpuflag(fma3)
 subj, 64 * 4
 %endmacro
 
@@ -400,3 +418,7 @@ INIT_XMM sse2
 SYNTH_FILTER
 INIT_YMM avx
 SYNTH_FILTER
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+SYNTH_FILTER
+%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index d7e0d65..beef288 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -82,6 +82,9 @@ SYNTH_FILTER_FUNC(sse)
 #endif
 SYNTH_FILTER_FUNC(sse2)
 SYNTH_FILTER_FUNC(avx)
+#if HAVE_FMA3_EXTERNAL
+SYNTH_FILTER_FUNC(fma3)
+#endif
 #endif /* HAVE_YASM */
 
 av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
@@ -100,5 +103,8 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 if (EXTERNAL_AVX(cpu_flags)) {
 s-synth_filter_float = synth_filter_avx;
 }
+if (EXTERNAL_FMA3(cpu_flags)) {
+s-synth_filter_float = synth_filter_fma3;
+}
 #endif /* HAVE_YASM */
 }
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/3 v2] x86/synth_filter: add synth_filter_avx

2014-03-20 Thread James Almer

Sandy Bridge Win64:
180 cycles in ff_synth_filter_inner_sse2
150 cycles in ff_synth_filter_inner_avx

Also switch some instructions to a three operand format to avoid
assembly errors with Yasm 1.1.0 or older.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 85 +++-
 libavcodec/x86/dcadsp_init.c |  4 +++
 2 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 970ec3d..662cb96 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -200,18 +200,22 @@ DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
 %macro SETZERO 1
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 pxor  %1, %1
 %else
 xorps %1, %1, %1
 %endif
 %endmacro
 
-%macro SHUF 2
-%if cpuflag(sse2)
-pshufd%1, %2, q0123
+%macro SHUF 3
+%if cpuflag(avx)
+mova  %3, [%2 - 16]
+vperm2f128%1, %3, %3, 1
+vshufps   %1, %1, %1, q0123
+%elif cpuflag(sse2)
+pshufd%1, [%2], q0123
 %else
-mova  %1, %2
+mova  %1, [%2]
 shufps%1, %1, q0123
 %endif
 %endmacro
@@ -220,43 +224,43 @@ DCA_LFE_FIR 1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-SHUF  m5, [ptr2 + j + (15 - 3) * 4]
+SHUF  m5,  ptr2 + j + (15 - 3) * 4, m6
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
+SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
-mulps m6, [win  + %1 + j + 16 * 4]
-mulps m5, [win  + %1 + j]
+mulps m6, m6,  [win + %1 + j + 16 * 4]
+mulps m5, m5,  [win + %1 + j]
 %if ARCH_X86_64
-mulpsm12, [win  + %1 + j + mmsize + 16 * 4]
-mulpsm11, [win  + %1 + j + mmsize]
+mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize]
 %endif
-addps m2, m6
-subps m1, m5
+addps m2, m2, m6
+subps m1, m1, m5
 %if ARCH_X86_64
-addps m8, m12
-subps m7, m11
+addps m8, m8, m12
+subps m7, m7, m11
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-SHUF  m6, [ptr2 + j + (31 - 3) * 4]
+SHUF  m6,  ptr2 + j + (31 - 3) * 4, m5
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
+SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
-mulps m5, [win  + %1 + j + 32 * 4]
-mulps m6, [win  + %1 + j + 48 * 4]
+mulps m5, m5,  [win + %1 + j + 32 * 4]
+mulps m6, m6,  [win + %1 + j + 48 * 4]
 %if ARCH_X86_64
-mulpsm11, [win  + %1 + j + mmsize + 32 * 4]
-mulpsm12, [win  + %1 + j + mmsize + 48 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4]
+mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4]
 %endif
-addps m3, m5
-addps m4, m6
+addps m3, m3, m5
+addps m4, m4, m6
 %if ARCH_X86_64
-addps m9, m11
-addpsm10, m12
+addps m9, m9, m11
+addpsm10, m10, m12
 %endif
 subj, 64 * 4
 %endmacro
@@ -269,25 +273,34 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 movd   scale, scalem
+SPLATDm0
 %else
-movss  scale, scalem
+VBROADCASTSS  m0, scalem
 %endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
+SPLATD  xmm0
+%if cpuflag(avx)
+vinsertf128   m0, m0, xmm0, 1
+%endif
 %define OFFQ  offq
 %endif
-SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
 and  r5q, -64
 shl  r5q, 2
+%if ARCH_X86_32 || notcpuflag(avx)
 mov OFFQ, r5q
 %define ir5q
 movi, 16 * 4 - (ARCH_X86_64 + 1) * mmsize  ; main loop counter
+%else
+%define i 0
+%define OFFQ  r5q
+%endif
 
 %define buf2 synth_buf2q
 %if ARCH_X86_32
@@ -306,8 +319,10 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 %define jr3q
 mov  win, windowm
 mov ptr1, synth_bufm
+%if ARCH_X86_32 || notcpuflag(avx)
 add  win, i
 add ptr1, i
+%endif
 %else ; ARCH_X86_64
 %define ptr1 r6q
 %define ptr2 r7q ; must be loaded

[libav-devel] [PATCH 1/3 v2] x86/synth_filter: add synth_filter_sse

2014-03-20 Thread James Almer

Build only on x86_32 targets.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 55 +---
 libavcodec/x86/dcadsp_init.c | 45 ++--
 2 files changed, 70 insertions(+), 30 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 56039ba..970ec3d 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -199,15 +199,31 @@ INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
-INIT_XMM sse2
+%macro SETZERO 1
+%if cpuflag(sse2)
+pxor  %1, %1
+%else
+xorps %1, %1, %1
+%endif
+%endmacro
+
+%macro SHUF 2
+%if cpuflag(sse2)
+pshufd%1, %2, q0123
+%else
+mova  %1, %2
+shufps%1, %1, q0123
+%endif
+%endmacro
+
 %macro INNER_LOOP   1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123
+SHUF  m5, [ptr2 + j + (15 - 3) * 4]
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-pshufd   m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123
+SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
 mova m12, [ptr1 + j + mmsize]
 %endif
 mulps m6, [win  + %1 + j + 16 * 4]
@@ -224,10 +240,10 @@ INIT_XMM sse2
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123
+SHUF  m6, [ptr2 + j + (31 - 3) * 4]
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-pshufd   m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123
+SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
 mulps m5, [win  + %1 + j + 32 * 4]
@@ -245,20 +261,25 @@ INIT_XMM sse2
 subj, 64 * 4
 %endmacro
 
-; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32],
-; const float window[512], float out[32],
-; intptr_t offset, float scale)
+; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32],
+;  const float window[512], float out[32],
+;  intptr_t offset, float scale)
+%macro SYNTH_FILTER 0
 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
+%if cpuflag(sse2)
 movd   scale, scalem
+%else
+movss  scale, scalem
+%endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
 %define OFFQ  offq
 %endif
-pshufdm0, m0, 0
+SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
@@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %endif
 .mainloop
 ; m1 = a  m2 = b  m3 = c  m4 = d
-pxor  m3, m3
-pxor  m4, m4
+SETZERO   m3
+SETZERO   m4
 mova  m1, [buf2 + i]
 mova  m2, [buf2 + i + 16 * 4]
 %if ARCH_X86_32
@@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %define ptr2 r7q ; must be loaded
 %define win  r8q
 %define jr9q
-pxor  m9, m9
-pxor m10, m10
+SETZERO   m9
+SETZERO  m10
 mova  m7, [buf2 + i + mmsize]
 mova  m8, [buf2 + i + mmsize + 16 * 4]
 lea  win, [windowq + i]
@@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 subi, (ARCH_X86_64 + 1) * mmsize
 jge.mainloop
 RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_XMM sse
+SYNTH_FILTER
+%endif
+INIT_XMM sse2
+SYNTH_FILTER
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 65e3db5..5b77985 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -56,25 +56,31 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 }
 }
 
-void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32],
-const float window[512],
-float out[32], intptr_t offset, float scale);
 
-#if HAVE_YASM
-static void synth_filter_sse2(FFTContext *imdct,
-  float *synth_buf_ptr, int *synth_buf_offset,
-  float synth_buf2[32], const float window[512],
-  float out[32], const float in[32], float scale)
-{
-float *synth_buf= synth_buf_ptr + *synth_buf_offset;
-
-imdct-imdct_half(imdct, synth_buf, in);
+#define SYNTH_FILTER_FUNC(opt

[libav-devel] [PATCH 0/3 v2] synth filter float ASM

2014-03-20 Thread James Almer

Here are some extra implementations that extend Christophe's work.

Differences with v1:

* AVX/FMA3: Removed the main loop and related bookkeepeing for x64 since said 
loop 
would be run only once anyway.
* FMA3: Replaced mulps+subps with FMA3 instructions, meaning two less 
instructions 
run per loop in that version.
* Removed some unnecessary preprocessor guards and added some missing ones.

Knowing that currently AMD has lackluster performance with ymm registers I 
could 
add an FMA4 version of this function using xmm registers, which would benefit 
said 
processors unlike the AVX/FMA3 ymm ones. Thoughts?

James Almer (3):
  x86/synth_filter: add synth_filter_sse
  x86/synth_filter: add synth_filter_avx
  x86/synth_filter: add synth_filter_fma3

 libavcodec/x86/dcadsp.asm| 138 ---
 libavcodec/x86/dcadsp_init.c |  55 +++--
 2 files changed, 143 insertions(+), 50 deletions(-)

-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3

2014-03-24 Thread James Almer

On 24/03/14 11:28 AM, Diego Biurrun wrote:
 On Thu, Mar 20, 2014 at 03:37:56PM -0300, James Almer wrote:
 --- a/libavcodec/x86/dcadsp.asm
 +++ b/libavcodec/x86/dcadsp.asm
 @@ -230,6 +230,14 @@ DCA_LFE_FIR 1
  mova m12, [ptr1 + j + mmsize]
  %endif
 +%if cpuflag(fma3)
 +fmaddps   m2, m6,  [win + %1 + j + 16 * 4], m2
 +fnmaddps  m1, m5,  [win + %1 + j], m1
 +%if ARCH_X86_64
 +fmaddps   m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
 +fnmaddps  m7, m11, [win + %1 + j + mmsize], m7
 +%endif
 +%else ; non-FMA
 
 Doesn't FMA3 imply x86_64?
 
 Diego

No, no simd extension so far implies x86_64. Not even the upcoming AVX512.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/3 v2] x86/synth_filter: add synth_filter_fma3

2014-03-24 Thread James Almer

On 24/03/14 3:07 PM, Diego Biurrun wrote:
 On Mon, Mar 24, 2014 at 02:59:08PM -0300, James Almer wrote:
 On 24/03/14 11:28 AM, Diego Biurrun wrote:
 On Thu, Mar 20, 2014 at 03:37:56PM -0300, James Almer wrote:
 --- a/libavcodec/x86/dcadsp.asm
 +++ b/libavcodec/x86/dcadsp.asm
 @@ -230,6 +230,14 @@ DCA_LFE_FIR 1
  mova m12, [ptr1 + j + mmsize]
  %endif
 +%if cpuflag(fma3)
 +fmaddps   m2, m6,  [win + %1 + j + 16 * 4], m2
 +fnmaddps  m1, m5,  [win + %1 + j], m1
 +%if ARCH_X86_64
 +fmaddps   m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
 +fnmaddps  m7, m11, [win + %1 + j + mmsize], m7
 +%endif
 +%else ; non-FMA

 Doesn't FMA3 imply x86_64?

 No, no simd extension so far implies x86_64. Not even the upcoming AVX512.
 
 But which modern x86 SIMD extensions are available on x86_32?
 
 Diego

All of them so far work on both x86_32 and x86_64, with the usual limitations 
for the former (8 general purpose registers and 8 simd registers).
As i said, even AVX512, which hasn't been realized in hardware yet, will also 
be available for x86_32.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/3 v2] x86/synth_filter: add synth_filter_sse

2014-03-28 Thread James Almer

On 28/03/14 4:15 PM, Jason Garrett-Glaser wrote:
 On Thu, Mar 20, 2014 at 11:37 AM, James Almer jamr...@gmail.com wrote:
 Build only on x86_32 targets.

 Signed-off-by: James Almer jamr...@gmail.com
 ---
  libavcodec/x86/dcadsp.asm| 55 
 +---
  libavcodec/x86/dcadsp_init.c | 45 ++--
  2 files changed, 70 insertions(+), 30 deletions(-)

 diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
 index 56039ba..970ec3d 100644
 --- a/libavcodec/x86/dcadsp.asm
 +++ b/libavcodec/x86/dcadsp.asm
 @@ -199,15 +199,31 @@ INIT_XMM sse
  DCA_LFE_FIR 0
  DCA_LFE_FIR 1

 -INIT_XMM sse2
 +%macro SETZERO 1
 +%if cpuflag(sse2)
 +pxor  %1, %1
 +%else
 +xorps %1, %1, %1
 +%endif
 +%endmacro
 
 Is there some reason we can't just use xorps here for all versions?  I
 mean, it is float data, right?
 
  %if ARCH_X86_32 || WIN64
 +%if cpuflag(sse2)
  movd   scale, scalem
 +%else
 +movss  scale, scalem
 +%endif
 
 Same here; does this need to be ifdeffed?
 
 Otherwise looks okay.
 
 Jason

You're right that it's all float data, but both Christophe and I tested and 
xorps/shufps was a bit slower than pxor/pshufd (At least in my tests it was 
about five cycles slower), so i decided to use some ifdeffery to keep the 
SSE2 version intact.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/3 v2] x86/synth_filter: add synth_filter_avx

2014-03-28 Thread James Almer

On 28/03/14 4:16 PM, Jason Garrett-Glaser wrote:
 On Thu, Mar 20, 2014 at 11:37 AM, James Almer jamr...@gmail.com wrote:
 Sandy Bridge Win64:
 180 cycles in ff_synth_filter_inner_sse2
 150 cycles in ff_synth_filter_inner_avx

 Also switch some instructions to a three operand format to avoid
 assembly errors with Yasm 1.1.0 or older.
 
 If this is an issue, could we possibly resolve it in x86inc.asm
 instead of uglifying the asm?
 
 Jason

Pretty much every AVX function in the tree is using the operand format to 
workaround this problem, so it will certainly be welcomed if someone fixes 
it.

And i don't think it uglyfies the asm that much. Besides, with the addition 
of the FMA3 version it ends up having a nice vertical alignment.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] Add gen-rc tool for generating Windows resource files

2014-03-31 Thread James Almer

On 30/03/14 10:49 AM, Vittorio Giovara wrote:
 From: Timothy Gu timothyg...@gmail.com
 
 ---
 Anyone willing to test this on Windows would be welcome.
 Vittorio
 
  tools/gen-rc | 122 
 +++
  1 file changed, 122 insertions(+)
  create mode 100755 tools/gen-rc
 
 diff --git a/tools/gen-rc b/tools/gen-rc
 new file mode 100755
 index 000..269f2f6
 --- /dev/null
 +++ b/tools/gen-rc
 @@ -0,0 +1,122 @@
 +#!/bin/sh
 +#
 +# Copyright (c) 2012 James Almer
 +# Copyright (c) 2013 Tiancheng Timothy Gu
 +#
 +# This file is part of Libav.
 +#
 +# Libav is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU Lesser General Public
 +# License as published by the Free Software Foundation; either
 +# version 2.1 of the License, or (at your option) any later version.
 +#
 +# Libav is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 +# See the GNU Lesser General Public License for more details.
 +#
 +# You should have received a copy of the GNU Lesser General Public License
 +# along with Libav; if not, write to the Free Software Foundation, Inc.,
 +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 +
 +## Help
 +die() {
 +cat EOF 2
 +This script is used to generate Windows resources file for the Libav 
 libraries.
 +The output .rc file is to be compiled by windres(1). It is mainly useful for
 +Libav developers to tweak and regenerate all resources files at once.
 +
 +Usage: $0 libname comment
 +
 +The script will output the file to 'libname/libname-without-libres.rc'.
 +
 +Example: $0 libavcodec 'Libav codecs library'
 +EOF
 +exit 1
 +}
 +
 +# Script to generate all:
 +# (to remove prefix '# ' and add 'tools/' as prefix: sed -r 
 's/^.{2}/tools\//')
 +# gen-rc libavutil Libav utility library
 +# gen-rc libavcodecLibav codec library
 +# gen-rc libavformat   Libav container format library
 +# gen-rc libavdevice   Libav device handling library
 +# gen-rc libavfilter   Libav audio/video filtering library
 +# gen-rc libavresample Libav audio resampling library
 +# gen-rc libswscaleLibav image rescaling library
 +
 +## Sanity checks and argument parsing
 +if test $# -lt 2 || test $# -gt 3; then
 +die
 +fi
 +
 +name=$1
 +shortname=${name#lib}
 +comment=$2
 +capname=`echo $name | awk '{print toupper($0)}'`
 +version=${capname}_VERSION
 +
 +mkdir -p $name
 +output=$name/${shortname}res.rc
 +
 +## REAL magic
 +cat EOF  $output
 +/*
 + * Windows resource file for $name
 + *
 + * Copyright (C) 2012 James Almer
 + * Copyright (C) 2013 Tiancheng Timothy Gu
 + *
 + * This file is part of Libav.
 + *
 + * Libav is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * Libav is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with Libav; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
 USA
 + */
 +
 +#include config.h
 +
 +#include windows.h
 +
 +#include libavutil/version.h
 +
 +#include $name/version.h
 +
 +1 VERSIONINFO
 +FILEVERSION ${version}_MAJOR, ${version}_MINOR, ${version}_MICRO, 0
 +PRODUCTVERSION  ${version}_MAJOR, ${version}_MINOR, ${version}_MICRO, 0
 +FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
 +FILEOS  VOS_NT_WINDOWS32
 +FILETYPEVFT_DLL
 +{
 +BLOCK StringFileInfo
 +{
 +BLOCK 040904B0
 +{
 +VALUE CompanyName,  Libav Project
 +VALUE FileDescription,  $comment
 +VALUE FileVersion,  AV_STRINGIFY($version)
 +VALUE InternalName, $name
 +VALUE LegalCopyright,   Copyright (C) 2000- 
 AV_STRINGIFY(CONFIG_THIS_YEAR)  Libav Project

This will fail because the commit adding CONFIG_THIS_YEAR is not in the tree.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] x86/synth_filter: remove the fma3 version ifdefs

2014-04-05 Thread James Almer

This fixes compilation failures with --disable-fma3

Signed-off-by: James Almer jamr...@gmail.com
---
See https://fate.libav.org/x86_32-linux-suncc-nosse/20140405142549

 libavcodec/x86/dcadsp.asm| 2 --
 libavcodec/x86/dcadsp_init.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 59d96bf..c42ee23 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -418,7 +418,5 @@ INIT_XMM sse2
 SYNTH_FILTER
 INIT_YMM avx
 SYNTH_FILTER
-%if HAVE_FMA3_EXTERNAL
 INIT_YMM fma3
 SYNTH_FILTER
-%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index beef288..9acb818 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -82,9 +82,7 @@ SYNTH_FILTER_FUNC(sse)
 #endif
 SYNTH_FILTER_FUNC(sse2)
 SYNTH_FILTER_FUNC(avx)
-#if HAVE_FMA3_EXTERNAL
 SYNTH_FILTER_FUNC(fma3)
-#endif
 #endif /* HAVE_YASM */
 
 av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] x86/synth_filter: remove the fma3 version ifdefs

2014-04-05 Thread James Almer

On 05/04/14 5:20 PM, Diego Biurrun wrote:
 On Sat, Apr 05, 2014 at 02:00:53PM -0300, James Almer wrote:
 This fixes compilation failures with --disable-fma3

 Signed-off-by: James Almer jamr...@gmail.com
 ---
 See https://fate.libav.org/x86_32-linux-suncc-nosse/20140405142549

  libavcodec/x86/dcadsp.asm| 2 --
  libavcodec/x86/dcadsp_init.c | 2 --
  2 files changed, 4 deletions(-)
 
 Hmm, I cannot reproduce this with gcc ...
 
 Diego

I can reproduce it with mingw-w64 4.8.2, configuring with --disable-fma3 
then running make libavcodec/x86/dcadsp_init.o.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] matroskaenc: Allow VP9 and Opus in webm

2014-05-31 Thread James Almer

On 31/05/14 4:23 AM, Anton Khirnov wrote:
 From: Tudor Suciu tudor.su...@gmail.com
 
 Signed-off-by: Anton Khirnov an...@khirnov.net
 ---
 The webm official page does not mention that, but Google seems to claim those
 are now offically supported. Nice to see they are keeping all the matroska
 traditions.
 ---

It's official, they just haven't released an stable version of libwebm with the 
added support just yet.
http://git.chromium.org/gitweb/?p=webm/libwebm.git;a=commitdiff;h=5efd6e3c1df766c08294ad19168e71522ee0d808

Not to mention both Gecko and Chromium based OS already can play VP9 webm files.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/6] Add av_dict_version() to make it quick to check if a dictionary has changed.

2014-06-04 Thread James Almer

On 04/06/14 3:33 PM, Andrew Stone wrote:
 By comparing versions of dictionaries, it's possible to detect if metadata
 has changed.
 ---
  libavutil/dict.c |  8 
  libavutil/dict.h | 10 ++
  2 files changed, 18 insertions(+)

Maybe revision is a better name to avoid confusion with functions and defines 
that return actual version numbers, like those from the libraries.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 15/15] hevc: Add x86 optimized idct functions

2014-06-25 Thread James Almer

On 24/06/14 11:26 AM, Luca Barbato wrote:
 From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr
 
 Signed-off-by: Luca Barbato lu_z...@gentoo.org
 ---
  libavcodec/hevc.c |  19 -
  libavcodec/hevcdsp.c  |   5 ++
  libavcodec/hevcdsp.h  |   4 +-
  libavcodec/hevcdsp_template.c | 176 +++--
  libavcodec/x86/Makefile   |   3 +-
  libavcodec/x86/hevc_idct.asm  | 180 
 ++
  libavcodec/x86/hevcdsp.h  |  18 +
  libavcodec/x86/hevcdsp_init.c |  62 +++
  8 files changed, 349 insertions(+), 118 deletions(-)
  create mode 100644 libavcodec/x86/hevc_idct.asm

[...]

 +%macro DC_ADD_INIT 2
 +add  %1w, ((1  14-8) + 1)
 +sar  %1w, (15-8)
 +movd  m0, %1

movd m0, %1d

NASM x86_64 will complain otherwise.

 +lea   %1, [%2*3]
 +SPLATWm0, m0, 0
 +pxor  m1, m1
 +psubw m1, m0
 +packuswb  m0, m0
 +packuswb  m1, m1
 +%endmacro

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions

2014-06-25 Thread James Almer

On 24/06/14 11:26 AM, Luca Barbato wrote:
 From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr
 
 The functions only support x86_64.
 
 Fixes from Hendrik Leppkes and James Almer
 
 Signed-off-by: Luca Barbato lu_z...@gentoo.org
 ---
  libavcodec/hevcdsp.c  |6 +-
  libavcodec/hevcdsp.h  |3 +
  libavcodec/x86/Makefile   |2 +
  libavcodec/x86/hevc_mc.asm| 1256 
 +
  libavcodec/x86/hevcdsp.h  |  164 ++
  libavcodec/x86/hevcdsp_init.c |  373 
  6 files changed, 1803 insertions(+), 1 deletion(-)
  create mode 100644 libavcodec/x86/hevc_mc.asm
  create mode 100644 libavcodec/x86/hevcdsp.h
  create mode 100644 libavcodec/x86/hevcdsp_init.c
 

Many of these functions are SSSE3 and a couple even SSE2 at most.
It will require some init macros rewriting to change, but leaving things as is 
will make atom, conroe and bobcat cpus miss a considerable performance boost.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions

2014-06-25 Thread James Almer

On 25/06/14 3:44 PM, Luca Barbato wrote:
 On 25/06/14 20:33, James Almer wrote:
 On 24/06/14 11:26 AM, Luca Barbato wrote:
 From: Pierre Edouard Lepere pierre-edouard.lep...@insa-rennes.fr

 The functions only support x86_64.

 Fixes from Hendrik Leppkes and James Almer

 Signed-off-by: Luca Barbato lu_z...@gentoo.org
 ---
  libavcodec/hevcdsp.c  |6 +-
  libavcodec/hevcdsp.h  |3 +
  libavcodec/x86/Makefile   |2 +
  libavcodec/x86/hevc_mc.asm| 1256 
 +
  libavcodec/x86/hevcdsp.h  |  164 ++
  libavcodec/x86/hevcdsp_init.c |  373 
  6 files changed, 1803 insertions(+), 1 deletion(-)
  create mode 100644 libavcodec/x86/hevc_mc.asm
  create mode 100644 libavcodec/x86/hevcdsp.h
  create mode 100644 libavcodec/x86/hevcdsp_init.c


 Many of these functions are SSSE3 and a couple even SSE2 at most.
 
 Can you guide me in this regard?

The SSE4 functions are those using pextrw (with memory operand) and packusdw.

hevc_put_hevc_bi_w2_{8,10}
hevc_put_hevc_bi_w4_{8,10}
hevc_put_hevc_bi_w6_{8,10}
hevc_put_hevc_bi_w8_{8,10}
hevc_put_hevc_uni_w2_{8,10}
hevc_put_hevc_uni_w4_{8,10}
hevc_put_hevc_uni_w6_{8,10}
hevc_put_hevc_uni_w8_{8,10}
hevc_put_hevc_uni_qpel_v{4,8}_10
hevc_put_hevc_uni_qpel_hv2_{8,10}
hevc_put_hevc_uni_qpel_hv4_{8,10}
hevc_put_hevc_uni_qpel_hv6_{8,10}
hevc_put_hevc_uni_qpel_hv8_{8,10}
hevc_put_hevc_uni_pel_pixels{2,6}_8
hevc_put_hevc_bi_pel_pixels{2,6}_8
hevc_put_hevc_{uni,bi}_epel_h2_8
hevc_put_hevc_{uni,bi}_epel_v2_8
hevc_put_hevc_{uni,bi}_epel_h6_8
hevc_put_hevc_{uni,bi}_epel_v6_8
hevc_put_hevc_{uni,bi}_epel_hv{2,6}_8

I think I'm not missing any.
both instructions can be emulated using sse2, so the relevant functions could 
be 
duplicated to create an SSE2/SSSE3 variant, but that's for another time/patch.

The rest are mostly SSSE3 because of pmaddubsw and pmulhrsw, and a few only 
SSE2.

The qpel and epel tables also need to be renamed to remove the sse4 suffix 
(Which 
is unneeded).

 
 It will require some init macros rewriting to change, but leaving things as 
 is 
 will make atom, conroe and bobcat cpus miss a considerable performance boost.
 
 Probably I can do myself but your help would be welcome =)

I don't have time nor really want to deal with the init macros, but i can help 
you with the necessary changes to the asm file if needed.

 lu
 
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel
 

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 06/15] hevc: Add SSE4 MC functions

2014-06-26 Thread James Almer

---
This applies cleanly after PATCH 14/15, and of course requires relevant changes 
to 
hevc_init.c
I think i got every function right, but in any case fixing any of them is a 
single 
line change.

In the end, out of 190 functions, only 44 were SSE4.

 libavcodec/x86/hevc_mc.asm | 363 +++--
 1 file changed, 281 insertions(+), 82 deletions(-)

diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index dac3295..4696fa8 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -30,8 +30,8 @@ zero:   times 4  dd 0
 one_per_32: times 4  dd 1
 
 SECTION .text
-%macro EPEL_TABLE 4
-hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
+%macro EPEL_TABLE 3
+hevc_epel_filters_%1 times %2 d%3 -2, 58
 times %2 d%3 10, -2
 times %2 d%3 -4, 54
 times %2 d%3 16, -2
@@ -49,11 +49,11 @@ hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
 
 
 
-EPEL_TABLE  8, 8, b, sse4
-EPEL_TABLE 10, 4, w, sse4
+EPEL_TABLE  8, 8, b
+EPEL_TABLE 10, 4, w
 
-%macro QPEL_TABLE 4
-hevc_qpel_filters_%4_%1 times %2 d%3  -1,  4
+%macro QPEL_TABLE 3
+hevc_qpel_filters_%1 times %2 d%3  -1,  4
 times %2 d%3 -10, 58
 times %2 d%3  17, -5
 times %2 d%3   1,  0
@@ -67,10 +67,10 @@ hevc_qpel_filters_%4_%1 times %2 d%3  -1,  4
 times %2 d%3   4, -1
 %endmacro
 
-QPEL_TABLE  8, 8, b, sse4
-QPEL_TABLE 10, 4, w, sse4
+QPEL_TABLE  8, 8, b
+QPEL_TABLE 10, 4, w
 
-%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
+%define hevc_qpel_filters_14 hevc_qpel_filters_10
 
 %if ARCH_X86_64
 
@@ -114,9 +114,9 @@ QPEL_TABLE 10, 4, w, sse4
 
 %macro EPEL_FILTER 2-4; bit depth, filter index
 %ifdef PIC
-lea rfilterq, [hevc_epel_filters_sse4_%1]
+lea rfilterq, [hevc_epel_filters_%1]
 %else
-%define rfilterq hevc_epel_filters_sse4_%1
+%define rfilterq hevc_epel_filters_%1
 %endif
 sub  %2q, 1
 shl  %2q, 5  ; multiply by 32
@@ -131,9 +131,9 @@ QPEL_TABLE 10, 4, w, sse4
 
 %macro EPEL_HV_FILTER 1
 %ifdef PIC
-lea rfilterq, [hevc_epel_filters_sse4_%1]
+lea rfilterq, [hevc_epel_filters_%1]
 %else
-%define rfilterq hevc_epel_filters_sse4_%1
+%define rfilterq hevc_epel_filters_%1
 %endif
 sub  mxq, 1
 sub  myq, 1
@@ -144,9 +144,9 @@ QPEL_TABLE 10, 4, w, sse4
 lea   r3srcq, [srcstrideq*3]
 
 %ifdef PIC
-lea rfilterq, [hevc_epel_filters_sse4_10]
+lea rfilterq, [hevc_epel_filters_10]
 %else
-%define rfilterq hevc_epel_filters_sse4_10
+%define rfilterq hevc_epel_filters_10
 %endif
 movdqa   m12, [rfilterq + myq]; get 2 first values of 
filters
 movdqa   m13, [rfilterq + myq+16] ; get 2 last values of 
filters
@@ -154,9 +154,9 @@ QPEL_TABLE 10, 4, w, sse4
 
 %macro QPEL_FILTER 2
 %ifdef PIC
-lea rfilterq, [hevc_qpel_filters_sse4_%1]
+lea rfilterq, [hevc_qpel_filters_%1]
 %else
-%define rfilterq hevc_qpel_filters_sse4_%1
+%define rfilterq hevc_qpel_filters_%1
 %endif
 lea  %2q, [%2q*8-8]
 movdqa   m12, [rfilterq + %2q*8]   ; get 4 first values of 
filters
@@ -389,9 +389,9 @@ QPEL_TABLE 10, 4, w, sse4
 
 %macro QPEL_HV_COMPUTE 4 ; width, bitdepth, filter idx
 %ifdef PIC
-lea rfilterq, [hevc_qpel_filters_sse4_%2]
+lea rfilterq, [hevc_qpel_filters_%2]
 %else
-%define rfilterq hevc_qpel_filters_sse4_%2
+%define rfilterq hevc_qpel_filters_%2
 %endif
 
 %if %2 == 8
@@ -498,7 +498,6 @@ QPEL_TABLE 10, 4, w, sse4
 %endif
 %endmacro
 
-INIT_XMM sse4; adds ff_ and _sse4 to 
function name
 ; **
 ; void put_hevc_mc_pixels(int16_t *dst, ptrdiff_t dststride,
 ; uint8_t *_src, ptrdiff_t _srcstride,
@@ -514,7 +513,9 @@ cglobal hevc_put_hevc_pel_pixels%1_%2, 5, 5, 3, dst, 
dststride, src, srcstride,h
 PEL_10STORE%1 dstq, m0, m1
 LOOP_END dst, dststride, src, srcstride
 RET
+%endmacro
 
+%macro HEVC_PUT_HEVC_UNI_PEL_PIXELS 2
 cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 3, dst, dststride, src, 
srcstride,height
 pxor  m2, m2
 .loop
@@ -525,7 +526,9 @@ cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 3, dst, 
dststride, src, srcstri
 dec  heightd ; cmp height
 jnz   .loop  ; height loop
 RET
+%endmacro
 
+%macro HEVC_PUT_HEVC_BI_PEL_PIXELS 2
 cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst, dststride, src, 
srcstride, src2, src2stride,height
 pxor  m2, m2
 movdqam5, [pw_bi_%2]
@@ -541,9 +544,44 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst,

Re: [libav-devel] [PATCH] mov: Mark a variable as unused

2014-07-07 Thread James Almer

On 07/07/14 4:08 PM, Martin Storsjö wrote:
 This silences a warning with gcc.
 ---
 In my defense, clang didn't show this warning.
 ---
  libavformat/mov.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/libavformat/mov.c b/libavformat/mov.c
 index 4a2d265..853c3e5 100644
 --- a/libavformat/mov.c
 +++ b/libavformat/mov.c
 @@ -213,7 +213,7 @@ static int mov_metadata_loci(MOVContext *c, AVIOContext 
 *pb, unsigned len)
  char language[4] = { 0 };
  char buf[100];
  uint16_t langcode = 0;
 -double longitude, latitude, altitude;
 +double longitude, latitude, av_unused(altitude);
  const char *key = location;
  
  if (len  4 + 2 + 1 + 1 + 4 + 4 + 4)

Why not just remove the variable altogether and do an avio_rb32() or 
avio_skip() after 
latitude? Assuming avio_rb32() is needed at all to increase the pb pointer, 
that is.

It would remove an unnecessary division.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 5/5] build: Add define for SIMD extensions requiring 16-byte aligned buffers

2014-07-21 Thread James Almer

On 21/07/14 6:21 PM, Diego Biurrun wrote:
 ---
 
 I'm slightly unsure about this one.  MMX does not require 16-byte aligned
 buffers, nor does PowerPC IIRC, but SSE and AltiVec do, so I believe my
 solution is closer to the original intention.  Please do correct me if I
 am wrong...
 
  configure  | 2 ++
  libavcodec/utils.c | 2 +-
  2 files changed, 3 insertions(+), 1 deletion(-)
 

[...]

 diff --git a/libavcodec/utils.c b/libavcodec/utils.c
 index 2abc376..16c30c3 100644
 --- a/libavcodec/utils.c
 +++ b/libavcodec/utils.c
 @@ -189,7 +189,7 @@ int ff_side_data_update_matrix_encoding(AVFrame *frame,
  return 0;
  }
  
 -#if HAVE_NEON || ARCH_PPC || HAVE_MMX
 +#if HAVE_SIMD_ALIGN_16
  #   define STRIDE_ALIGN 16
  #else
  #   define STRIDE_ALIGN 8

#if HAVE_NEON || HAVE_ALTIVEC || HAVE_SSE
#   define STRIDE_ALIGN 16
#else
#   define STRIDE_ALIGN 8
#endif

is simpler than adding another HAVE_ define for this single use.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/3] mem: add av_strndup() for duplicating substrings

2014-08-12 Thread James Almer

On 12/08/14 1:54 PM, Anton Khirnov wrote:
 ---
  doc/APIchanges  |  3 +++
  libavutil/mem.c | 20 
  libavutil/mem.h | 10 ++
  libavutil/version.h |  2 +-
  4 files changed, 34 insertions(+), 1 deletion(-)
 

Shouldn't this be in avstring.h/c?
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 07/17] dxva2: Undefine _WIN32_WINNT before defining it

2014-08-17 Thread James Almer

On 15/08/14 5:13 PM, Diego Biurrun wrote:
 This avoids a number of redefinition warnings.
 ---
  libavcodec/dxva2_internal.h | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
 index f35a076..d50c0ff 100644
 --- a/libavcodec/dxva2_internal.h
 +++ b/libavcodec/dxva2_internal.h
 @@ -23,6 +23,7 @@
  #ifndef AVCODEC_DXVA_INTERNAL_H
  #define AVCODEC_DXVA_INTERNAL_H
  
 +#undef _WIN32_WINNT
  #define _WIN32_WINNT 0x0600
  #define COBJMACROS

_WIN32_WINNT may already be defined with a value higher than 0x0600 (For 
example when 
targeting Win7 or Win8), and this would be forcing it to a lower value.
In practice and as far as libavcodec's DXVA2 support goes there's probably no 
difference, 
but the more correct thing to do would be check if it's already defined, and 
then only 
redefine it if it's  0x0600.

For that matter, dxva2_internal.h includes dxva2.h, a header that also tries to 
define 
_WIN32_WINNT. It would be best to have all this only in dxva2.h to reduce code 
duplication.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 5/6] lavc: Add private API to manipulate AVPacketList

2014-08-31 Thread James Almer

On 31/08/14 4:24 PM, Luca Barbato wrote:
 ---
  libavcodec/avcodec.h   |  5 +
  libavcodec/avpacket.c  | 56 
 ++
  libavcodec/internal.h  | 36 
  libavformat/avformat.h |  6 --
  4 files changed, 97 insertions(+), 6 deletions(-)
 
 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
 index 270c6c8..116496f 100644
 --- a/libavcodec/avcodec.h
 +++ b/libavcodec/avcodec.h
 @@ -1019,6 +1019,11 @@ typedef struct AVPacket {
  #define AV_PKT_FLAG_KEY 0x0001 /// The packet contains a keyframe
  #define AV_PKT_FLAG_CORRUPT 0x0002 /// The packet content is corrupted
  
 +typedef struct AVPacketList {
 +AVPacket pkt;
 +struct AVPacketList *next;
 +} AVPacketList;
 +
  enum AVSideDataParamChangeFlags {
  AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
  AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002,
 diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
 index 25eabdb..bea12df 100644
 --- a/libavcodec/avpacket.c
 +++ b/libavcodec/avpacket.c
 @@ -26,6 +26,7 @@
  #include libavutil/internal.h
  #include libavutil/mathematics.h
  #include libavutil/mem.h
 +#include internal.h
  #include avcodec.h
  #if FF_API_DESTRUCT_PACKET
  
 @@ -393,3 +394,58 @@ void av_packet_rescale_ts(AVPacket *pkt, AVRational 
 src_tb, AVRational dst_tb)
  if (pkt-convergence_duration  0)
  pkt-convergence_duration = av_rescale_q(pkt-convergence_duration, 
 src_tb, dst_tb);
  }
 +
 +int ff_packet_list_put(AVPacketList **head, AVPacketList **tail,
 +   AVPacket *pkt)

avpriv_? (in all three functions).
lavf can make good use of all this. And i think your original patchset did as 
much.

Thanks for resurrecting this for that matter.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] jpeg2000: split off inverse MCT decoding as Jpeg2000DSP

2014-10-01 Thread James Almer

This makes the addition of arch optimized functions easier.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/Makefile  |  2 +-
 libavcodec/jpeg2000dec.c | 72 +--
 libavcodec/jpeg2000dsp.c | 98 
 libavcodec/jpeg2000dsp.h | 35 +
 libavcodec/jpeg2000dwt.h |  3 +-
 5 files changed, 154 insertions(+), 56 deletions(-)
 create mode 100644 libavcodec/jpeg2000dsp.c
 create mode 100644 libavcodec/jpeg2000dsp.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 178b61e..69b92b6 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -229,7 +229,7 @@ OBJS-$(CONFIG_INDEO4_DECODER)  += indeo4.o 
ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INDEO5_DECODER)  += indeo5.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
-OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o  \
+OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o 
jpeg2000dsp.o \
   jpeg2000dwt.o mqcdec.o mqc.o
 OBJS-$(CONFIG_JPEGLS_DECODER)  += jpeglsdec.o jpegls.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)  += jpeglsenc.o jpegls.o
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index aed9b2b..5135297 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -35,6 +35,7 @@
 #include internal.h
 #include thread.h
 #include jpeg2000.h
+#include jpeg2000dsp.h
 
 #define JP2_SIG_TYPE0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
@@ -85,6 +86,7 @@ typedef struct Jpeg2000DecoderContext {
 
 int16_t curtileno;
 Jpeg2000Tile*tile;
+Jpeg2000DSPContext dsp;
 
 /*options parameters*/
 int reduction_factor;
@@ -1041,69 +1043,21 @@ static void dequantization_int(int x, int y, 
Jpeg2000Cblk *cblk,
 }
 }
 
-/* Inverse ICT parameters in float and integer.
- * int value = (float value) * (116) */
-static const float f_ict_params[4] = {
-1.402f,
-0.34413f,
-0.71414f,
-1.772f
-};
-static const int   i_ict_params[4] = {
- 91881,
- 22553,
- 46802,
-116130
-};
-
-static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+static inline void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 {
 int i, csize = 1;
-int32_t *src[3],  i0,  i1,  i2;
-float   *srcf[3], i0f, i1f, i2f;
+void *src[3];
 
 for (i = 0; i  3; i++)
 if (tile-codsty[0].transform == FF_DWT97)
-srcf[i] = tile-comp[i].f_data;
+src[i] = tile-comp[i].f_data;
 else
-src [i] = tile-comp[i].i_data;
+src[i] = tile-comp[i].i_data;
 
 for (i = 0; i  2; i++)
 csize *= tile-comp[0].coord[i][1] - tile-comp[0].coord[i][0];
-switch (tile-codsty[0].transform) {
-case FF_DWT97:
-for (i = 0; i  csize; i++) {
-i0f = *srcf[0] + (f_ict_params[0] * *srcf[2]);
-i1f = *srcf[0] - (f_ict_params[1] * *srcf[1])
-   - (f_ict_params[2] * *srcf[2]);
-i2f = *srcf[0] + (f_ict_params[3] * *srcf[1]);
-*srcf[0]++ = i0f;
-*srcf[1]++ = i1f;
-*srcf[2]++ = i2f;
-}
-break;
-case FF_DWT97_INT:
-for (i = 0; i  csize; i++) {
-i0 = *src[0] + (((i_ict_params[0] * *src[2]) + (1  15))  16);
-i1 = *src[0] - (((i_ict_params[1] * *src[1]) + (1  15))  16)
- - (((i_ict_params[2] * *src[2]) + (1  15))  16);
-i2 = *src[0] + (((i_ict_params[3] * *src[1]) + (1  15))  16);
-*src[0]++ = i0;
-*src[1]++ = i1;
-*src[2]++ = i2;
-}
-break;
-case FF_DWT53:
-for (i = 0; i  csize; i++) {
-i1 = *src[0] - (*src[2] + *src[1]  2);
-i0 = i1 + *src[2];
-i2 = i1 + *src[1];
-*src[0]++ = i0;
-*src[1]++ = i1;
-*src[2]++ = i2;
-}
-break;
-}
+
+s-dsp.mct_decode[tile-codsty[0].transform](src[0], src[1], src[2], 
csize);
 }
 
 static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
@@ -1406,6 +1360,15 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s)
 return 0;
 }
 
+static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
+{
+Jpeg2000DecoderContext *s = avctx-priv_data;
+
+ff_jpeg2000dsp_init(s-dsp);
+
+return 0;
+}
+
 static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
  int *got_frame, AVPacket *avpkt)
 {
@@ -1510,6 +1473,7 @@ AVCodec ff_jpeg2000_decoder = {
 .capabilities = CODEC_CAP_FRAME_THREADS,
 .priv_data_size   = sizeof(Jpeg2000DecoderContext),
 .init_static_data = jpeg2000_init_static_data,
+.init = jpeg2000_decode_init,
 .decode   = jpeg2000_decode_frame

[libav-devel] [PATCH] jpeg2000: split off inverse MCT as Jpeg2000DSP

2014-10-02 Thread James Almer

This makes the addition of arch optimized functions easier.

Signed-off-by: James Almer jamr...@gmail.com
---
Now with proper names for the dsp functions, Irreversible MCT (ICT) and 
Reversible MCT (RCT) respectively.
No other changes.

 libavcodec/Makefile  |  2 +-
 libavcodec/jpeg2000dec.c | 72 +--
 libavcodec/jpeg2000dsp.c | 98 
 libavcodec/jpeg2000dsp.h | 35 +
 libavcodec/jpeg2000dwt.h |  3 +-
 5 files changed, 154 insertions(+), 56 deletions(-)
 create mode 100644 libavcodec/jpeg2000dsp.c
 create mode 100644 libavcodec/jpeg2000dsp.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 178b61e..69b92b6 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -229,7 +229,7 @@ OBJS-$(CONFIG_INDEO4_DECODER)  += indeo4.o 
ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INDEO5_DECODER)  += indeo5.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
-OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o  \
+OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o 
jpeg2000dsp.o \
   jpeg2000dwt.o mqcdec.o mqc.o
 OBJS-$(CONFIG_JPEGLS_DECODER)  += jpeglsdec.o jpegls.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)  += jpeglsenc.o jpegls.o
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index aed9b2b..5135297 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -35,6 +35,7 @@
 #include internal.h
 #include thread.h
 #include jpeg2000.h
+#include jpeg2000dsp.h
 
 #define JP2_SIG_TYPE0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
@@ -85,6 +86,7 @@ typedef struct Jpeg2000DecoderContext {
 
 int16_t curtileno;
 Jpeg2000Tile*tile;
+Jpeg2000DSPContext dsp;
 
 /*options parameters*/
 int reduction_factor;
@@ -1041,69 +1043,21 @@ static void dequantization_int(int x, int y, 
Jpeg2000Cblk *cblk,
 }
 }
 
-/* Inverse ICT parameters in float and integer.
- * int value = (float value) * (116) */
-static const float f_ict_params[4] = {
-1.402f,
-0.34413f,
-0.71414f,
-1.772f
-};
-static const int   i_ict_params[4] = {
- 91881,
- 22553,
- 46802,
-116130
-};
-
-static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+static inline void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 {
 int i, csize = 1;
-int32_t *src[3],  i0,  i1,  i2;
-float   *srcf[3], i0f, i1f, i2f;
+void *src[3];
 
 for (i = 0; i  3; i++)
 if (tile-codsty[0].transform == FF_DWT97)
-srcf[i] = tile-comp[i].f_data;
+src[i] = tile-comp[i].f_data;
 else
-src [i] = tile-comp[i].i_data;
+src[i] = tile-comp[i].i_data;
 
 for (i = 0; i  2; i++)
 csize *= tile-comp[0].coord[i][1] - tile-comp[0].coord[i][0];
-switch (tile-codsty[0].transform) {
-case FF_DWT97:
-for (i = 0; i  csize; i++) {
-i0f = *srcf[0] + (f_ict_params[0] * *srcf[2]);
-i1f = *srcf[0] - (f_ict_params[1] * *srcf[1])
-   - (f_ict_params[2] * *srcf[2]);
-i2f = *srcf[0] + (f_ict_params[3] * *srcf[1]);
-*srcf[0]++ = i0f;
-*srcf[1]++ = i1f;
-*srcf[2]++ = i2f;
-}
-break;
-case FF_DWT97_INT:
-for (i = 0; i  csize; i++) {
-i0 = *src[0] + (((i_ict_params[0] * *src[2]) + (1  15))  16);
-i1 = *src[0] - (((i_ict_params[1] * *src[1]) + (1  15))  16)
- - (((i_ict_params[2] * *src[2]) + (1  15))  16);
-i2 = *src[0] + (((i_ict_params[3] * *src[1]) + (1  15))  16);
-*src[0]++ = i0;
-*src[1]++ = i1;
-*src[2]++ = i2;
-}
-break;
-case FF_DWT53:
-for (i = 0; i  csize; i++) {
-i1 = *src[0] - (*src[2] + *src[1]  2);
-i0 = i1 + *src[2];
-i2 = i1 + *src[1];
-*src[0]++ = i0;
-*src[1]++ = i1;
-*src[2]++ = i2;
-}
-break;
-}
+
+s-dsp.mct_decode[tile-codsty[0].transform](src[0], src[1], src[2], 
csize);
 }
 
 static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
@@ -1406,6 +1360,15 @@ static int jp2_find_codestream(Jpeg2000DecoderContext *s)
 return 0;
 }
 
+static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
+{
+Jpeg2000DecoderContext *s = avctx-priv_data;
+
+ff_jpeg2000dsp_init(s-dsp);
+
+return 0;
+}
+
 static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
  int *got_frame, AVPacket *avpkt)
 {
@@ -1510,6 +1473,7 @@ AVCodec ff_jpeg2000_decoder = {
 .capabilities = CODEC_CAP_FRAME_THREADS,
 .priv_data_size   = sizeof(Jpeg2000DecoderContext),
 .init_static_data

[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available

2014-10-08 Thread James Almer

This silences warnings about passing arguments from incompatible pointer type
when targeting Windows Vista or newer.

Signed-off-by: James Almer jamr...@gmail.com
---
Only tested with Mingw-w64 v3.
Can someone test MSVC or ICL? I know the former defines the struct, so the 
configure check should succeed on that one at least.

 compat/w32pthreads.h | 23 +--
 configure|  2 ++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index 2a7f323..2642661 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -55,12 +55,15 @@ typedef struct pthread_t {
  * not mutexes */
 typedef CRITICAL_SECTION pthread_mutex_t;
 
-/* This is the CONDITIONAL_VARIABLE typedef for using Window's native
- * conditional variables on kernels 6.0+.
- * MinGW does not currently have this typedef. */
+/* This is the CONDITION_VARIABLE typedef for using Window's native
+ * conditional variables on kernels 6.0+. */
+#if HAVE_CONDITION_VARIABLE_PTR
+typedef CONDITION_VARIABLE pthread_cond_t;
+#else
 typedef struct pthread_cond_t {
-void *ptr;
+void *Ptr;
 } pthread_cond_t;
+#endif
 
 /* function pointers to conditional variable API on windows 6.0+ kernels */
 #if _WIN32_WINNT  0x0600
@@ -159,7 +162,7 @@ static av_unused int pthread_cond_init(pthread_cond_t 
*cond, const void *unused_
 win32_cond = av_mallocz(sizeof(win32_cond_t));
 if (!win32_cond)
 return ENOMEM;
-cond-ptr = win32_cond;
+cond-Ptr = win32_cond;
 win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL);
 if (!win32_cond-semaphore)
 return ENOMEM;
@@ -174,7 +177,7 @@ static av_unused int pthread_cond_init(pthread_cond_t 
*cond, const void *unused_
 
 static av_unused void pthread_cond_destroy(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 /* native condition variables do not destroy */
 if (cond_init)
 return;
@@ -185,12 +188,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t 
*cond)
 pthread_mutex_destroy(win32_cond-mtx_waiter_count);
 pthread_mutex_destroy(win32_cond-mtx_broadcast);
 av_freep(win32_cond);
-cond-ptr = NULL;
+cond-Ptr = NULL;
 }
 
 static av_unused void pthread_cond_broadcast(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 
 if (cond_broadcast) {
@@ -221,7 +224,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t 
*cond)
 
 static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int last_waiter;
 if (cond_wait) {
 cond_wait(cond, mutex, INFINITE);
@@ -253,7 +256,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t 
*cond, pthread_mutex_t *mu
 
 static av_unused void pthread_cond_signal(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 if (cond_signal) {
 cond_signal(cond);
diff --git a/configure b/configure
index 06a2d4e..cda463d 100755
--- a/configure
+++ b/configure
@@ -1767,6 +1767,7 @@ TOOLCHAIN_FEATURES=
 
 
 TYPES_LIST=
+CONDITION_VARIABLE_Ptr
 socklen_t
 struct_addrinfo
 struct_group_source_req
@@ -4719,6 +4720,7 @@ check_func_headers windows.h PeekNamedPipe
 check_func_headers windows.h SetConsoleTextAttribute
 check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
+check_struct windows.h CONDITION_VARIABLE Ptr
 check_func_headers glob.h glob
 enabled xlib 
 check_func_headers X11/Xlib.h X11/extensions/Xvlib.h XvGetPortAttribute 
-lXv -lX11 -lXext
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows

2014-10-08 Thread James Almer

Wrap the function calls in a similar fashion to how it's being done
with the critical section API.

Signed-off-by: James Almer jamr...@gmail.com
---
 compat/w32pthreads.h | 64 +++-
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index 2642661..693ef51 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -65,32 +65,6 @@ typedef struct pthread_cond_t {
 } pthread_cond_t;
 #endif
 
-/* function pointers to conditional variable API on windows 6.0+ kernels */
-#if _WIN32_WINNT  0x0600
-static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
-static void (WINAPI *cond_init)(pthread_cond_t *cond);
-static void (WINAPI *cond_signal)(pthread_cond_t *cond);
-static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
-DWORD milliseconds);
-#else
-#define cond_init  InitializeConditionVariable
-#define cond_broadcast WakeAllConditionVariable
-#define cond_signalWakeConditionVariable
-#define cond_wait  SleepConditionVariableCS
-
-#define CreateEvent(a, reset, init, name)   \
-CreateEventEx(a, name,  \
-  (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \
-  (init ? CREATE_EVENT_INITIAL_SET : 0),\
-  EVENT_ALL_ACCESS)
-// CreateSemaphoreExA seems to be desktop-only, but as long as we don't
-// use named semaphores, it doesn't matter if we use the W version.
-#define CreateSemaphore(a, b, c, d) \
-CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
-#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
-#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
-#endif
-
 static av_unused unsigned __stdcall attribute_align_arg 
win32thread_worker(void *arg)
 {
 pthread_t *h = arg;
@@ -139,6 +113,36 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m)
 return 0;
 }
 
+#if _WIN32_WINNT = 0x0600
+static inline int pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
+{
+InitializeConditionVariable(cond);
+return 0;
+}
+
+/* native condition variables do not destroy */
+static inline void pthread_cond_destroy(pthread_cond_t *cond)
+{
+return;
+}
+
+static inline void pthread_cond_broadcast(pthread_cond_t *cond)
+{
+WakeAllConditionVariable(cond);
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
+{
+SleepConditionVariableCS(cond, mutex, INFINITE);
+return 0;
+}
+
+static inline void pthread_cond_signal(pthread_cond_t *cond)
+{
+WakeConditionVariable(cond);
+}
+
+#else // _WIN32_WINNT  0x0600
 /* for pre-Windows 6.0 platforms we need to define and use our own condition
  * variable and api */
 typedef struct  win32_cond_t {
@@ -150,6 +154,13 @@ typedef struct  win32_cond_t {
 volatile int is_broadcast;
 } win32_cond_t;
 
+/* function pointers to conditional variable API on windows 6.0+ kernels */
+static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
+static void (WINAPI *cond_init)(pthread_cond_t *cond);
+static void (WINAPI *cond_signal)(pthread_cond_t *cond);
+static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
+DWORD milliseconds);
+
 static av_unused int pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
 {
 win32_cond_t *win32_cond = NULL;
@@ -278,6 +289,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t 
*cond)
 
 pthread_mutex_unlock(win32_cond-mtx_broadcast);
 }
+#endif
 
 static av_unused void w32thread_init(void)
 {
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available

2014-10-08 Thread James Almer

This silences warnings about passing arguments from incompatible pointer type
when targeting Windows Vista or newer.

Signed-off-by: James Almer jamr...@gmail.com
---
Sent the wrong version earlier, my bad.
The request for testing i made before still stands.

 compat/w32pthreads.h | 23 +--
 configure|  2 ++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index d0b90e8..b905a95 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -54,12 +54,15 @@ typedef struct pthread_t {
  * not mutexes */
 typedef CRITICAL_SECTION pthread_mutex_t;
 
-/* This is the CONDITIONAL_VARIABLE typedef for using Window's native
- * conditional variables on kernels 6.0+.
- * MinGW does not currently have this typedef. */
+/* This is the CONDITION_VARIABLE typedef for using Window's native
+ * conditional variables on kernels 6.0+. */
+#if HAVE_CONDITION_VARIABLE_PTR
+typedef CONDITION_VARIABLE pthread_cond_t;
+#else
 typedef struct pthread_cond_t {
-void *ptr;
+void *Ptr;
 } pthread_cond_t;
+#endif
 
 /* function pointers to conditional variable API on windows 6.0+ kernels */
 #if _WIN32_WINNT  0x0600
@@ -158,7 +161,7 @@ static av_unused void pthread_cond_init(pthread_cond_t 
*cond, const void *unused
 win32_cond = av_mallocz(sizeof(win32_cond_t));
 if (!win32_cond)
 return;
-cond-ptr = win32_cond;
+cond-Ptr = win32_cond;
 win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL);
 if (!win32_cond-semaphore)
 return;
@@ -172,7 +175,7 @@ static av_unused void pthread_cond_init(pthread_cond_t 
*cond, const void *unused
 
 static av_unused void pthread_cond_destroy(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 /* native condition variables do not destroy */
 if (cond_init)
 return;
@@ -183,12 +186,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t 
*cond)
 pthread_mutex_destroy(win32_cond-mtx_waiter_count);
 pthread_mutex_destroy(win32_cond-mtx_broadcast);
 av_freep(win32_cond);
-cond-ptr = NULL;
+cond-Ptr = NULL;
 }
 
 static av_unused void pthread_cond_broadcast(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 
 if (cond_broadcast) {
@@ -219,7 +222,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t 
*cond)
 
 static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int last_waiter;
 if (cond_wait) {
 cond_wait(cond, mutex, INFINITE);
@@ -251,7 +254,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t 
*cond, pthread_mutex_t *mu
 
 static av_unused void pthread_cond_signal(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 if (cond_signal) {
 cond_signal(cond);
diff --git a/configure b/configure
index d87871e..a82bef7 100755
--- a/configure
+++ b/configure
@@ -1498,6 +1498,7 @@ TOOLCHAIN_FEATURES=
 
 
 TYPES_LIST=
+CONDITION_VARIABLE_Ptr
 socklen_t
 struct_addrinfo
 struct_group_source_req
@@ -4088,6 +4089,7 @@ check_func_headers windows.h MapViewOfFile
 check_func_headers windows.h SetConsoleTextAttribute
 check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
+check_struct windows.h CONDITION_VARIABLE Ptr
 
 check_header direct.h
 check_header dlfcn.h
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows

2014-10-08 Thread James Almer

Wrap the function calls in a similar fashion to how it's being done
with the critical section API.

Signed-off-by: James Almer jamr...@gmail.com
---
 compat/w32pthreads.h | 63 ++--
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index b905a95..e586ecb 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -64,32 +64,6 @@ typedef struct pthread_cond_t {
 } pthread_cond_t;
 #endif
 
-/* function pointers to conditional variable API on windows 6.0+ kernels */
-#if _WIN32_WINNT  0x0600
-static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
-static void (WINAPI *cond_init)(pthread_cond_t *cond);
-static void (WINAPI *cond_signal)(pthread_cond_t *cond);
-static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
-DWORD milliseconds);
-#else
-#define cond_init  InitializeConditionVariable
-#define cond_broadcast WakeAllConditionVariable
-#define cond_signalWakeConditionVariable
-#define cond_wait  SleepConditionVariableCS
-
-#define CreateEvent(a, reset, init, name)   \
-CreateEventEx(a, name,  \
-  (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \
-  (init ? CREATE_EVENT_INITIAL_SET : 0),\
-  EVENT_ALL_ACCESS)
-// CreateSemaphoreExA seems to be desktop-only, but as long as we don't
-// use named semaphores, it doesn't matter if we use the W version.
-#define CreateSemaphore(a, b, c, d) \
-CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
-#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
-#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
-#endif
-
 static av_unused unsigned __stdcall attribute_align_arg 
win32thread_worker(void *arg)
 {
 pthread_t *h = arg;
@@ -138,6 +112,35 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m)
 return 0;
 }
 
+#if _WIN32_WINNT = 0x0600
+static inline void pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
+{
+InitializeConditionVariable(cond);
+}
+
+/* native condition variables do not destroy */
+static inline void pthread_cond_destroy(pthread_cond_t *cond)
+{
+return;
+}
+
+static inline void pthread_cond_broadcast(pthread_cond_t *cond)
+{
+WakeAllConditionVariable(cond);
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
+{
+SleepConditionVariableCS(cond, mutex, INFINITE);
+return 0;
+}
+
+static inline void pthread_cond_signal(pthread_cond_t *cond)
+{
+WakeConditionVariable(cond);
+}
+
+#else // _WIN32_WINNT  0x0600
 /* for pre-Windows 6.0 platforms we need to define and use our own condition
  * variable and api */
 typedef struct  win32_cond_t {
@@ -149,6 +152,13 @@ typedef struct  win32_cond_t {
 volatile int is_broadcast;
 } win32_cond_t;
 
+/* function pointers to conditional variable API on windows 6.0+ kernels */
+static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
+static void (WINAPI *cond_init)(pthread_cond_t *cond);
+static void (WINAPI *cond_signal)(pthread_cond_t *cond);
+static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
+DWORD milliseconds);
+
 static av_unused void pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
 {
 win32_cond_t *win32_cond = NULL;
@@ -276,6 +286,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t 
*cond)
 
 pthread_mutex_unlock(win32_cond-mtx_broadcast);
 }
+#endif
 
 static av_unused void w32thread_init(void)
 {
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows

2014-10-09 Thread James Almer

On 09/10/14 5:03 AM, Martin Storsjö wrote:
 On Wed, 8 Oct 2014, James Almer wrote:
 
 Wrap the function calls in a similar fashion to how it's being done
 with the critical section API.

 Signed-off-by: James Almer jamr...@gmail.com
 ---
 compat/w32pthreads.h | 63 
 ++--
 1 file changed, 37 insertions(+), 26 deletions(-)

 diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
 index b905a95..e586ecb 100644
 --- a/compat/w32pthreads.h
 +++ b/compat/w32pthreads.h
 @@ -64,32 +64,6 @@ typedef struct pthread_cond_t {
 } pthread_cond_t;
 #endif

 -/* function pointers to conditional variable API on windows 6.0+ kernels */
 -#if _WIN32_WINNT  0x0600
 -static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
 -static void (WINAPI *cond_init)(pthread_cond_t *cond);
 -static void (WINAPI *cond_signal)(pthread_cond_t *cond);
 -static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t 
 *mutex,
 -DWORD milliseconds);
 -#else
 -#define cond_init  InitializeConditionVariable
 -#define cond_broadcast WakeAllConditionVariable
 -#define cond_signalWakeConditionVariable
 -#define cond_wait  SleepConditionVariableCS
 -
 -#define CreateEvent(a, reset, init, name)   \
 -CreateEventEx(a, name,  \
 -  (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \
 -  (init ? CREATE_EVENT_INITIAL_SET : 0),\
 -  EVENT_ALL_ACCESS)
 -// CreateSemaphoreExA seems to be desktop-only, but as long as we don't
 -// use named semaphores, it doesn't matter if we use the W version.
 -#define CreateSemaphore(a, b, c, d) \
 -CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
 -#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
 -#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
 -#endif
 -
 
 Where did the 
 CreateEvent/CreateSemaphore/InitializeCriticalSection/WaitForSingleObject 
 definitions go here? When targeting desktop windows they don't matter (since 
 the old functions still exist), but when targeting WinRT/WinPhone, the old 
 functions are no longer available.

You're right about InitializeCriticalSection and WaitForSingleObject (I somehow 
missed those), but 
the redefinition of CreateEvent and CreateSemaphore are not needed anymore 
since they will now be 
used only for the non-native version of the condition variable API, which is 
only compiled when 
_WIN32_WINT  0x0600.

I'll send a patch to put the former two back in place.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] compat/w32pthreads: use the condition variable API directly when targeting newer versions of Windows

2014-10-09 Thread James Almer

Wrap the function calls in a similar fashion to how it's being done
with the critical section API.

Signed-off-by: James Almer jamr...@gmail.com
---
 compat/w32pthreads.h | 60 +---
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index f8eb0c8..3748289 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -64,28 +64,7 @@ typedef struct pthread_cond_t {
 } pthread_cond_t;
 #endif
 
-/* function pointers to conditional variable API on windows 6.0+ kernels */
-#if _WIN32_WINNT  0x0600
-static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
-static void (WINAPI *cond_init)(pthread_cond_t *cond);
-static void (WINAPI *cond_signal)(pthread_cond_t *cond);
-static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
-DWORD milliseconds);
-#else
-#define cond_init  InitializeConditionVariable
-#define cond_broadcast WakeAllConditionVariable
-#define cond_signalWakeConditionVariable
-#define cond_wait  SleepConditionVariableCS
-
-#define CreateEvent(a, reset, init, name)   \
-CreateEventEx(a, name,  \
-  (reset ? CREATE_EVENT_MANUAL_RESET : 0) | \
-  (init ? CREATE_EVENT_INITIAL_SET : 0),\
-  EVENT_ALL_ACCESS)
-// CreateSemaphoreExA seems to be desktop-only, but as long as we don't
-// use named semaphores, it doesn't matter if we use the W version.
-#define CreateSemaphore(a, b, c, d) \
-CreateSemaphoreExW(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
+#if _WIN32_WINNT = 0x0600
 #define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
 #define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
 #endif
@@ -138,6 +117,35 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m)
 return 0;
 }
 
+#if _WIN32_WINNT = 0x0600
+static inline void pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
+{
+InitializeConditionVariable(cond);
+}
+
+/* native condition variables do not destroy */
+static inline void pthread_cond_destroy(pthread_cond_t *cond)
+{
+return;
+}
+
+static inline void pthread_cond_broadcast(pthread_cond_t *cond)
+{
+WakeAllConditionVariable(cond);
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
+{
+SleepConditionVariableCS(cond, mutex, INFINITE);
+return 0;
+}
+
+static inline void pthread_cond_signal(pthread_cond_t *cond)
+{
+WakeConditionVariable(cond);
+}
+
+#else // _WIN32_WINNT  0x0600
 /* for pre-Windows 6.0 platforms we need to define and use our own condition
  * variable and api */
 typedef struct  win32_cond_t {
@@ -149,6 +157,13 @@ typedef struct  win32_cond_t {
 volatile int is_broadcast;
 } win32_cond_t;
 
+/* function pointers to conditional variable API on windows 6.0+ kernels */
+static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
+static void (WINAPI *cond_init)(pthread_cond_t *cond);
+static void (WINAPI *cond_signal)(pthread_cond_t *cond);
+static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
+DWORD milliseconds);
+
 static av_unused void pthread_cond_init(pthread_cond_t *cond, const void 
*unused_attr)
 {
 win32_cond_t *win32_cond = NULL;
@@ -276,6 +291,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t 
*cond)
 
 pthread_mutex_unlock(win32_cond-mtx_broadcast);
 }
+#endif
 
 static av_unused void w32thread_init(void)
 {
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] compat/w32pthreads: use the CONDITION_VARIABLE typedef if available

2014-10-09 Thread James Almer

This silences warnings about passing arguments from incompatible pointer type
when targeting Windows Vista or newer.

Signed-off-by: James Almer jamr...@gmail.com
---
 compat/w32pthreads.h | 23 +--
 configure|  2 ++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index d0b90e8..f8eb0c8 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -54,12 +54,15 @@ typedef struct pthread_t {
  * not mutexes */
 typedef CRITICAL_SECTION pthread_mutex_t;
 
-/* This is the CONDITIONAL_VARIABLE typedef for using Window's native
- * conditional variables on kernels 6.0+.
- * MinGW does not currently have this typedef. */
+/* This is the CONDITION_VARIABLE typedef for using Windows' native
+ * conditional variables on kernels 6.0+. */
+#if HAVE_CONDITION_VARIABLE_PTR
+typedef CONDITION_VARIABLE pthread_cond_t;
+#else
 typedef struct pthread_cond_t {
-void *ptr;
+void *Ptr;
 } pthread_cond_t;
+#endif
 
 /* function pointers to conditional variable API on windows 6.0+ kernels */
 #if _WIN32_WINNT  0x0600
@@ -158,7 +161,7 @@ static av_unused void pthread_cond_init(pthread_cond_t 
*cond, const void *unused
 win32_cond = av_mallocz(sizeof(win32_cond_t));
 if (!win32_cond)
 return;
-cond-ptr = win32_cond;
+cond-Ptr = win32_cond;
 win32_cond-semaphore = CreateSemaphore(NULL, 0, 0x7fff, NULL);
 if (!win32_cond-semaphore)
 return;
@@ -172,7 +175,7 @@ static av_unused void pthread_cond_init(pthread_cond_t 
*cond, const void *unused
 
 static av_unused void pthread_cond_destroy(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 /* native condition variables do not destroy */
 if (cond_init)
 return;
@@ -183,12 +186,12 @@ static av_unused void pthread_cond_destroy(pthread_cond_t 
*cond)
 pthread_mutex_destroy(win32_cond-mtx_waiter_count);
 pthread_mutex_destroy(win32_cond-mtx_broadcast);
 av_freep(win32_cond);
-cond-ptr = NULL;
+cond-Ptr = NULL;
 }
 
 static av_unused void pthread_cond_broadcast(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 
 if (cond_broadcast) {
@@ -219,7 +222,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t 
*cond)
 
 static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t 
*mutex)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int last_waiter;
 if (cond_wait) {
 cond_wait(cond, mutex, INFINITE);
@@ -251,7 +254,7 @@ static av_unused int pthread_cond_wait(pthread_cond_t 
*cond, pthread_mutex_t *mu
 
 static av_unused void pthread_cond_signal(pthread_cond_t *cond)
 {
-win32_cond_t *win32_cond = cond-ptr;
+win32_cond_t *win32_cond = cond-Ptr;
 int have_waiter;
 if (cond_signal) {
 cond_signal(cond);
diff --git a/configure b/configure
index d87871e..a82bef7 100755
--- a/configure
+++ b/configure
@@ -1498,6 +1498,7 @@ TOOLCHAIN_FEATURES=
 
 
 TYPES_LIST=
+CONDITION_VARIABLE_Ptr
 socklen_t
 struct_addrinfo
 struct_group_source_req
@@ -4088,6 +4089,7 @@ check_func_headers windows.h MapViewOfFile
 check_func_headers windows.h SetConsoleTextAttribute
 check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
+check_struct windows.h CONDITION_VARIABLE Ptr
 
 check_header direct.h
 check_header dlfcn.h
-- 
2.0.4


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/4] lavc: add a public API for parsing vorbis packets.

2014-11-03 Thread James Almer

On 29/10/14 12:46 PM, Anton Khirnov wrote:
 It is required by (at least) the ogg demuxer.
 
 Mark the current semi-public apriv API for removal.
 ---
  doc/APIchanges |  3 +++
  libavcodec/Makefile| 12 ++
  libavcodec/version.h   |  4 ++--
  libavcodec/vorbis_parse.h  | 58 
 ++
  libavcodec/vorbis_parser.c | 52 -
  libavcodec/vorbis_parser.h |  7 --
  6 files changed, 121 insertions(+), 15 deletions(-)
  create mode 100644 libavcodec/vorbis_parse.h

Maybe call the internal header vorbis_parser_internal.h, and the public one 
vorbis_parser.h?
That's how it was done for dv_profile, and is less confusing.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] vf_interlace: x86: improve asm performance

2014-11-24 Thread James Almer

On 24/11/14 8:05 PM, Vittorio Giovara wrote:
 On Mon, Nov 24, 2014 at 5:58 PM, Henrik Gramner hen...@gramner.com wrote:
 +mova m2, [r2+r1]
 +mova m3, [r2+r1+mmsize]
 +pxor m2, m6
 +pxor m3, m6
 pxor m2, m6, [r2+r1]
 pxor m3, m6, [r2+r1+mmsize]

 Avoids two moves in AVX, otherwise LGTM.
 
 queued, thanks for the suggestion

Looking at the committed code, you followed Henrik's suggestion about the pxor 
lines, but you didn't remove the mova lines, which are now redundant.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] Fwd: Bug#771126: libav: contains non-DFSG image file tests/lena.pnm

2014-11-27 Thread James Almer

On 27/11/14 2:43 PM, Reinhard Tartler wrote:
 Hi,
 
 it seems that tests/lena.pnm is not really redistributable. I'm
 proposing to replace it with an image I've taken this summer. The
 patch itself is too large to post it here, which is why I've uploaded
 it to https://github.com/libav/libav/pull/17.
 
 Luca seems okay with it, koda suggested to ask here anyways.
 
 OK to push to master? any suggestions for the commit message?
 
 Reinhard

Isn't it cleaner to just upload lena to the fate suit instead of having to 
update the reference files for all the relevant tests?
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] Fwd: Bug#771126: libav: contains non-DFSG image file tests/lena.pnm

2014-11-27 Thread James Almer

On 27/11/14 3:03 PM, Reinhard Tartler wrote:
 On Thu, Nov 27, 2014 at 12:59 PM, James Almer jamr...@gmail.com wrote:
 On 27/11/14 2:43 PM, Reinhard Tartler wrote:
 Hi,

 it seems that tests/lena.pnm is not really redistributable. I'm
 proposing to replace it with an image I've taken this summer. The
 patch itself is too large to post it here, which is why I've uploaded
 it to https://github.com/libav/libav/pull/17.

 Luca seems okay with it, koda suggested to ask here anyways.

 OK to push to master? any suggestions for the commit message?

 Reinhard

 Isn't it cleaner to just upload lena to the fate suit instead of having to
 update the reference files for all the relevant tests?
 
 How is that cleaner than replacing non-redistributable files with a
 perfectly free one?

Cleaner commit. It would be a matter of changing the path of lena.pnm to the 
samples 
directory. A couple lines change.

 
 Moving it to fate ensures that no tests can be executed without fate
 (currently, there are some).

vsynth and asynth dependent tests would still run without the fate suit.

Anyway, just my 2 cents.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing

2014-12-13 Thread James Almer

On 12/12/14 5:57 PM, Vittorio Giovara wrote:
 From: James Almer jamr...@gmail.com
 
 Signed-off-by: James Almer jamr...@gmail.com
 Signed-off-by: Michael Niedermayer michae...@gmx.at
 Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com
 ---
  Changelog |   1 +
  libavformat/Makefile  |   1 +
  libavformat/oggdec.c  |   1 +
  libavformat/oggdec.h  |   1 +
  libavformat/oggparsevp8.c | 142 
 ++
  libavformat/version.h |   4 +-
  6 files changed, 148 insertions(+), 2 deletions(-)
  create mode 100644 libavformat/oggparsevp8.c
 

[...]

 diff --git a/libavformat/oggparsevp8.c b/libavformat/oggparsevp8.c
 new file mode 100644
 index 000..1256bfe
 --- /dev/null
 +++ b/libavformat/oggparsevp8.c
 @@ -0,0 +1,142 @@
 +/*
 + * On2 VP8 parser for Ogg
 + * Copyright (C) 2013 James Almer
 + *
 + * This file is part of Libav.
 + *
 + * Libav is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * Libav is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with Libav; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
 USA
 + */
 +
 +#include libavutil/intreadwrite.h
 +
 +#include avformat.h
 +#include internal.h
 +#include oggdec.h
 +
 +#define VP8_HEADER_SIZE 26
 +
 +static int vp8_header(AVFormatContext *s, int idx)
 +{
 +struct ogg *ogg = s-priv_data;
 +struct ogg_stream *os = ogg-streams + idx;
 +uint8_t *p = os-buf + os-pstart;
 +AVStream *st = s-streams[idx];
 +AVRational framerate;
 +
 +if (os-psize  7 || p[0] != 0x4f)
 +return 0;
 +
 +switch (p[5]){
 +case 0x01:
 +if (os-psize  VP8_HEADER_SIZE) {
 +av_log(s, AV_LOG_ERROR, Invalid OggVP8 header packet);
 +return AVERROR_INVALIDDATA;
 +}
 +
 +if (p[6] != 1) {
 +av_log(s, AV_LOG_WARNING,
 +   Unknown OggVP8 version %d.%d\n, p[6], p[7]);
 +return AVERROR_INVALIDDATA;
 +}
 +
 +st-codec-width= AV_RB16(p +  8);
 +st-codec-height   = AV_RB16(p + 10);
 +st-sample_aspect_ratio.num = AV_RB24(p + 12);
 +st-sample_aspect_ratio.den = AV_RB24(p + 15);
 +framerate.den   = AV_RB32(p + 18);
 +framerate.num   = AV_RB32(p + 22);
 +
 +avpriv_set_pts_info(st, 64, framerate.num, framerate.den);
 +st-codec-codec_type = AVMEDIA_TYPE_VIDEO;
 +st-codec-codec_id   = AV_CODEC_ID_VP8;
 +st-need_parsing  = AVSTREAM_PARSE_HEADERS;
 +break;
 +case 0x02:
 +if (p[6] != 0x20)
 +return AVERROR_INVALIDDATA;
 +ff_vorbis_comment(s, st-metadata, p + 7, os-psize - 7, 1);

ff_vorbis_stream_comment() for consistency with the other parsers.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing

2014-12-16 Thread James Almer

On 16/12/14 11:58 AM, Vittorio Giovara wrote:
 From: James Almer jamr...@gmail.com
 
 Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com
 ---
 Dropped the sign-offs since the file was modified.
 Addressed Anton's and James' comment.
 Vittorio
 
  Changelog |   1 +
  libavformat/Makefile  |   1 +
  libavformat/oggdec.c  |   1 +
  libavformat/oggdec.h  |   1 +
  libavformat/oggparsevp8.c | 142 
 ++
  libavformat/version.h |   4 +-
  6 files changed, 148 insertions(+), 2 deletions(-)
  create mode 100644 libavformat/oggparsevp8.c

Please undo the change Anton requested. It was correct in the first patch.
Check the samples from http://people.freedesktop.org/~slomo/ogg-vp8/ and see 
the framerate it reports for them.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] oggdec: add support for VP8 demuxing

2014-12-16 Thread James Almer

On 16/12/14 3:05 PM, Vittorio Giovara wrote:
 On Tue, Dec 16, 2014 at 6:10 PM, James Almer jamr...@gmail.com wrote:
 On 16/12/14 11:58 AM, Vittorio Giovara wrote:
 From: James Almer jamr...@gmail.com

 Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com
 ---
 Dropped the sign-offs since the file was modified.
 Addressed Anton's and James' comment.
 Vittorio

  Changelog |   1 +
  libavformat/Makefile  |   1 +
  libavformat/oggdec.c  |   1 +
  libavformat/oggdec.h  |   1 +
  libavformat/oggparsevp8.c | 142 
 ++
  libavformat/version.h |   4 +-
  6 files changed, 148 insertions(+), 2 deletions(-)
  create mode 100644 libavformat/oggparsevp8.c

 Please undo the change Anton requested. It was correct in the first patch.
 Check the samples from http://people.freedesktop.org/~slomo/ogg-vp8/ and see
 the framerate it reports for them.
 
 Thanks for the link. According to the specifications hosted there
 http://people.freedesktop.org/~slomo/ogg-vp8/ogg-vp8.pdf it looks like
 numerator and denominator are parsed wrong.
 
 your code
 st-codec-width= AV_RB16(p +  8);
 st-codec-height   = AV_RB16(p + 10);
 st-sample_aspect_ratio.num = AV_RB24(p + 12);
 st-sample_aspect_ratio.den = AV_RB24(p + 15);
 framerate.den   = AV_RB32(p + 18);
 framerate.num   = AV_RB32(p + 22);
 
 spec code
 FW 16 Stored frame width.
 FH 16 Stored frame height.
 PARN 24 Pixel aspect ratio numerator.
 PARD 24 Pixel aspect ratio denominator.
 FPSN 32 Frame rate numerator.
 FPSD 32 Frame rate denominator
 
 So it looks like the change Anton requested was correct and the
 AV_RB32 need to be inverted, unless I am missing something. James, can
 you confirm for me please?

Yes, that should work as well.

 
 On an unrelated note, would it be possible to have a fate test?
 Thanks

Sure, I'll send one after this is committed.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/3] dca: Support for XLL (lossless extension)

2015-03-16 Thread James Almer

On 16/03/15 5:00 AM, Niels Möller wrote:
 James Almer jamr...@gmail.com writes:
 
 Valgrind is complaining about this code (Conditional jump or move
 depends on uninitialised
 value error), as seen here
 https://fate.libav.org/x86_64-linux-gcc-valgrind/20150316044429

 Zero initializing the param_state[16] struct from
 ff_dca_xll_decode_audio() with { { 0 } }
 fixes it, but it's possible it may instead be hiding the real bug in the 
 code.
 
 If I read the code correctly, it looks like params-pancABIT0 is read
 from the stream for the first segment (seg == 0) only, and used for
 decoding params-nSamplePart0 samples. And that the latter value ought
 to be always zero when seg != 0.
 
 The logic is a bit complex, and since it many months since I wrote that
 code, I don't quite remember how it is supposed to work... But I suspect
 the problem is that the value, which is a loop invariant, is read and
 tested up-front, even in the case that the loop using it runs for zero
 iterations.
 
 Can you test if the below patch solves the problem? It reads
 params-pancABIT0 only when it's going to be used.
 
 Regards,
 /Niels
 
 diff --git a/libavcodec/dca_xll.c b/libavcodec/dca_xll.c
 index 0c32d6e..5a558b8 100644
 --- a/libavcodec/dca_xll.c
 +++ b/libavcodec/dca_xll.c
 @@ -514,8 +514,8 @@ int ff_dca_xll_decode_audio(DCAContext *s, AVFrame *frame)
  }
  for (i = 0; i  chset-channels; i++) {
  int param_index = params-seg_type ? 0 : i;
 -int bits= params-pancABIT0[param_index];
  int part0   = params-nSamplPart0[param_index];
 +int bits= part0 ? params-pancABIT0[param_index] : 0;
  int *sample_buf = s-xll_sample_buf +
(in_channel + i) * s-xll_smpl_in_seg;

Yes, it fixes it on my end.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/3] dca: Support for XLL (lossless extension)

2015-03-15 Thread James Almer

On 13/03/15 12:24 PM, Luca Barbato wrote:
 On 13/03/15 16:17, Diego Biurrun wrote:
 From: Niels Möller ni...@lysator.liu.se

 ---

 Changes since last round:

 - XLL disabled by default.
 - Return error on too many downmix coefficients

 This has survived Oracle, so it's good to go IMO and will hit the
 tree very soon, barring last minute comments/objections.

 
 Fine for me.
 
 lu

Valgrind is complaining about this code (Conditional jump or move depends on 
uninitialised 
value error), as seen here 
https://fate.libav.org/x86_64-linux-gcc-valgrind/20150316044429

Zero initializing the param_state[16] struct from ff_dca_xll_decode_audio() 
with { { 0 } } 
fixes it, but it's possible it may instead be hiding the real bug in the code.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] atestsrc: Initial implementation

2015-03-12 Thread James Almer

On 12/03/15 1:14 PM, Luca Barbato wrote:
 On 12/03/15 13:54, Derek Buitenhuis wrote:
 On 3/11/2015 7:07 PM, Luca Barbato wrote:
 On top of it, I wasn't aware it exists

 The feature has been complete and in FFmpeg master for 1.5+ years...

 I do not think pretend FFmpeg doesn't exist and never look at it
 at all LALALAL I CAN'T HEAR YOU is a valid stance to take as a
 competitive library. The only ones who suffer are users.
 
 Sure would be nice to add more features that are present in FFmpeg, but looks 
 like you and Hendrik missed the purpose of this patch.
 
 I want to write a walk through in the form of blogposts, so I need to write 
 something quite simple and possibly improve it incrementally.
 
 I did already for the demuxer (that maybe could enjoy a second post with the 
 feedback martin gave) that I know better.
 
 Since I do not know so well avfilter, I wanted to see if even this minimal 
 audio source is right before blogging (for the demuxer I blogged and then 
 sent the code to the ml).
 
 lu

Either write it, blog about it and do not apply the end result to the tree, or 
write a 
different simple filter instead. There are surely many filters ideas not yet 
written in 
any of the two projects one could write from scratch.
I'm doing it because i want to write a blog about how to write a filter is 
not a good 
reason to create and apply a second implementation that inconveniences the end 
user.

Or alternatively you could, after finishing writing this filter and the blog 
post, port 
aevalsrc and cherry pick code from your filter to improve the former.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] tiff: Return more meaningful error codes

2015-03-29 Thread James Almer

On 28/03/15 2:52 PM, Justin Ruggles wrote:
 On 03/28/2015 01:42 PM, Himangi Saraogi wrote:
 ---
   libavcodec/tiffenc.c | 11 ++-
   1 file changed, 6 insertions(+), 5 deletions(-)

 diff --git a/libavcodec/tiffenc.c b/libavcodec/tiffenc.c
 index 169360f..46e4207 100644
 --- a/libavcodec/tiffenc.c
 +++ b/libavcodec/tiffenc.c
 @@ -153,7 +153,8 @@ static int add_entry1(TiffEncoderContext *s,
* @param dst Output buffer
* @param n Size of input buffer
* @param compr Compression method
 - * @return Number of output bytes. If an output error is encountered, -1 
 returned
 + * @return Number of output bytes. If an output error is encountered, a 
 negative
 + * value corresponding to an AVERROR error code is returned.
*/
   static int encode_strip(TiffEncoderContext *s, const int8_t *src,
   uint8_t *dst, int n, int compr)
 @@ -166,14 +167,14 @@ static int encode_strip(TiffEncoderContext *s, const 
 int8_t *src,
   unsigned long zlen = s-buf_size - (*s-buf - s-buf_start);
   if (compress(dst, zlen, src, n) != Z_OK) {
   av_log(s-avctx, AV_LOG_ERROR, Compressing failed\n);
 -return -1;
 +return AVERROR_INVALIDDATA;
 
 This is an unknown error from an external library, so AVERROR_UNKNOWN should 
 be returned.
 
   }
   return zlen;
   }
   #endif
   case TIFF_RAW:
   if (check_size(s, n))
 -return -1;
 +return AVERROR(EINVAL);
   memcpy(dst, src, n);
   return n;
   case TIFF_PACKBITS:
 @@ -182,7 +183,7 @@ static int encode_strip(TiffEncoderContext *s, const 
 int8_t *src,
   case TIFF_LZW:
   return ff_lzw_encode(s-lzws, src, n);
   default:
 -return -1;
 +return AVERROR_UNKNOWN;
 
 Should be AVERROR_BUG since compression type is an AVOption that has defined 
 bounds.

No, this should be AVERROR(EINVAL) because even inside the bounds there are 
several values for 
compressions that are not currently supported.
i can do avconv -i INPUT -compression_algo 2 OUTPUT and it wouldn't be a bug, 
it would be an 
invalid argument.

 
   }
   }

 @@ -291,7 +292,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
 *pkt,
   default:
   av_log(s-avctx, AV_LOG_ERROR,
  This colors format is not supported\n);
 -return -1;
 +return AVERROR_INVALIDDATA;
 
 This really never should happen in practice, but at any rate the correct 
 error value is AVERROR(EINVAL) because it is an unsupported/invalid field set 
 by the user.
 
   }

   if (s-compr == TIFF_DEFLATE   ||

 
 
 Thanks,
 Justin
 
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] Canopus HQ/HQA decoder

2015-03-24 Thread James Almer

On 22/03/15 12:49 PM, Vittorio Giovara wrote:
 +// AAN IDCT

If this isn't already in the tree somewhere and it's generic enough that it can 
be reused, 
then it should be shared like faanidct and added to idctdsp.
And if it's HQ/HQA specific, it still could be split into a new hqdsp context 
for potential 
optimizations.

 +
 +#define FIX_1_082 17734
 +#define FIX_1_847 30274
 +#define FIX_1_414 23170
 +#define FIX_2_613 21407 // divided by two to fit the range
 +
 +#define IDCTMUL(a, b) ((a) * (b)  16)
 +
 +static inline void idct_row(int16_t *blk)
 +{
 +int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA;
 +int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14;
 +
 +tmp0 = blk[5] - blk[3];
 +tmp1 = blk[5] + blk[3];
 +tmp2 = blk[1] - blk[7];
 +tmp3 = blk[1] + blk[7];
 +tmp4 = tmp3 - tmp1;
 +tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847);
 +tmp6 = IDCTMUL(tmp2,FIX_1_082) - tmp5;
 +tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2;
 +tmp8 = tmp3 + tmp1;
 +tmp9 = tmp7 * 4 - tmp8;
 +tmpA = IDCTMUL(tmp4, FIX_1_414) * 4 - tmp9;
 +tmpB = tmp6 * 4 + tmpA;
 +tmpC = blk[2] + blk[6];
 +tmpD = blk[2] - blk[6];
 +tmpE = blk[0] - blk[4];
 +tmpF = blk[0] + blk[4];
 +
 +tmp10 = IDCTMUL(tmpD, FIX_1_414) * 4 - tmpC;
 +tmp11 = tmpE - tmp10;
 +tmp12 = tmpF - tmpC;
 +tmp13 = tmpE + tmp10;
 +tmp14 = tmpF + tmpC;
 +
 +blk[0] = tmp14 + tmp8;
 +blk[1] = tmp13 + tmp9;
 +blk[2] = tmp11 + tmpA;
 +blk[3] = tmp12 - tmpB;
 +blk[4] = tmp12 + tmpB;
 +blk[5] = tmp11 - tmpA;
 +blk[6] = tmp13 - tmp9;
 +blk[7] = tmp14 - tmp8;
 +}
 +
 +static inline void idct_col(int16_t *blk)
 +{
 +int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA;
 +int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14;
 +
 +tmp0 = blk[5 * 8] - blk[3 * 8];
 +tmp1 = blk[5 * 8] + blk[3 * 8];
 +tmp2 = blk[1 * 8] * 2 - (blk[7 * 8]  2);
 +tmp3 = blk[1 * 8] * 2 + (blk[7 * 8]  2);
 +tmp4 = tmp3 - tmp1;
 +tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847);
 +tmp6 = IDCTMUL(tmp2,FIX_1_082) - tmp5;
 +tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2;
 +tmp8 = (tmp3 + tmp1)  1;
 +tmp9 = tmp7 * 2 - tmp8;
 +tmpA = IDCTMUL(tmp4, FIX_1_414) * 2 - tmp9;
 +tmpB = tmp6 * 2 + tmpA;
 +tmpC =  blk[2 * 8] + (blk[6 * 8]  1)  1;
 +tmpD =  blk[2 * 8] - (blk[6 * 8]  1);
 +tmpE = (blk[0 * 8]  1) - (blk[4 * 8]  1) + 0x2020;
 +tmpF = (blk[0 * 8]  1) + (blk[4 * 8]  1) + 0x2020;
 +
 +tmp10 = IDCTMUL(tmpD, FIX_1_414) * 2 - tmpC;
 +tmp11 = tmpE - tmp10;
 +tmp12 = tmpF - tmpC;
 +tmp13 = tmpE + tmp10;
 +tmp14 = tmpF + tmpC;
 +
 +blk[0 * 8] = (tmp14 + tmp8)  6;
 +blk[1 * 8] = (tmp13 + tmp9)  6;
 +blk[2 * 8] = (tmp11 + tmpA)  6;
 +blk[3 * 8] = (tmp12 - tmpB)  6;
 +blk[4 * 8] = (tmp12 + tmpB)  6;
 +blk[5 * 8] = (tmp11 - tmpA)  6;
 +blk[6 * 8] = (tmp13 - tmp9)  6;
 +blk[7 * 8] = (tmp14 - tmp8)  6;
 +}
 +
 +static void hq_idct_put(uint8_t *dst, int stride, int16_t *block)
 +{
 +int i, j;
 +
 +for (i = 0; i  8; i++)
 +idct_row(block + i * 8);
 +for (i = 0; i  8; i++)
 +idct_col(block + i);
 +
 +// or use IDCTDSPContext.put_pixels_clamped()

Bench and see if it's worth using? There's an optimized version for most 
platforms after 
all.

 +for (i = 0; i  8; i++) {
 +for (j = 0; j  8; j++)
 +dst[j] = av_clip_uint8(block[j + i * 8]);
 +dst += stride;
 +}
 +}
 +
 +static inline void put_blocks(HQContext *c, AVFrame *pic,
 +  int plane, int x, int y, int ilace,
 +  int16_t *block0, int16_t *block1)
 +{
 +uint8_t *p = pic-data[plane] + x;
 +
 +hq_idct_put(p + y * pic-linesize[plane],
 +pic-linesize[plane]  ilace, block0);
 +hq_idct_put(p + (y + (ilace ? 1 : 8)) * pic-linesize[plane],
 +pic-linesize[plane]  ilace, block1);
 +}

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] avcodec: add libdcadec decoder

2015-03-23 Thread James Almer

On 23/03/15 10:23 AM, Luca Barbato wrote:
 On 23/03/15 12:45, Hendrik Leppkes wrote:
 ---
  configure  |   4 +
  libavcodec/Makefile|   1 +
  libavcodec/allcodecs.c |   1 +
  libavcodec/libdcadec.c | 197 
 +
  4 files changed, 203 insertions(+)
  create mode 100644 libavcodec/libdcadec.c

 diff --git a/configure b/configure
 index 3c38a8c..6eaac29 100755
 --- a/configure
 +++ b/configure
 @@ -184,6 +184,7 @@ External library support:
--enable-libcdio enable audio CD grabbing with libcdio
--enable-libdc1394   enable IIDC-1394 grabbing using libdc1394
 and libraw1394 [no]
 +  --enable-libdcadec   enable DCA decoding via libdcadec [no]
--enable-libfaac enable AAC encoding via libfaac [no]
--enable-libfdk-aac  enable AAC de/encoding via libfdk-aac [no]
--enable-libfreetype enable libfreetype [no]
 @@ -1149,6 +1150,7 @@ EXTERNAL_LIBRARY_LIST=
  libbs2b
  libcdio
  libdc1394
 +libdcadec
  libfaac
  libfdk_aac
  libfontconfig
 @@ -2004,6 +2006,7 @@ mpeg4video_parser_select=error_resilience h263dsp 
 mpeg_er mpegvideo qpeldsp
  vc1_parser_select=mpegvideo startcode vc1_decoder
  
  # external libraries
 +libdcadec_decoder_deps=libdcadec
  libfaac_encoder_deps=libfaac
  libfaac_encoder_select=audio_frame_queue
  libfdk_aac_decoder_deps=libfdk_aac
 @@ -4206,6 +4209,7 @@ enabled avisynth   { { check_header 
 avisynth/avisynth_c.h  check_l
  enabled frei0r { check_header frei0r.h || die ERROR: 
 frei0r.h header not found; }
  enabled gnutls require_pkg_config gnutls gnutls/gnutls.h 
 gnutls_global_init
  enabled libbs2brequire_pkg_config libbs2b bs2b.h bs2b_open
 +enabled libdcadec  require libdcadec libdcadec/dca_context.h 
 dcadec_context_create -ldcadec
 
 I'll get libdcadec a pkgconf file =p

It has one already. 
https://github.com/foo86/dcadec/commit/1ddd3b5547c33b36093c0786632c1287714252c6

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 01/14] Move AVAudioServiceType enum from lavc to lavu

2015-05-02 Thread James Almer

On 02/05/15 5:22 AM, Luca Barbato wrote:
 On 02/05/15 06:23, Anton Khirnov wrote:
 Quoting Vittorio Giovara (2015-05-02 01:17:08)
 The enum is used by lavc, lavf and lavfi, and it is referenced by lavu,
 so it sementically belongs to lavu more than any other.

 This change allows to drop an avcodec.h inclusion from avfilter.h.


 I would disagree here, since this logic would apply to any side data
 struct whatsoever. And I don't think they should all be in lavu.
 
 libav(meta)data ? =)
 
 Might be nice split libavutil a little so:
 
 libavu - mem, basic data types, compat, version machinery
 
 libavdata - packet, frame, samples and pixels

Sounds like libavcore.

 
 libavcomp - compressors
 
 libavhash - hashes

Might as well just drop all these modules and make libgcrypt a mandatory 
dependency if
it comes to this...
I don't think anyone links to lavu exclusively for the crypto modules. A 
library like
this would exist only to be linked against lavc/lavf.

 
 This is something I'd like to have soon if nobody is strongly against it.
 
 lu
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel
 

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] dashenc: replace attribute id with contentType for the AdaptationSet element

2015-05-10 Thread James Almer

id should be an integer, not a string. It is also optional, so use
contentType instead which is the proper attribute for these values.

This fixes an MPD validation error.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavformat/dashenc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index fc5c823..f228b86 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -503,7 +503,7 @@ static int write_manifest(AVFormatContext *s, int final)
 }
 
 if (c-has_video) {
-avio_printf(out, \t\tAdaptationSet id=\video\ 
segmentAlignment=\true\ bitstreamSwitching=\true\\n);
+avio_printf(out, \t\tAdaptationSet contentType=\video\ 
segmentAlignment=\true\ bitstreamSwitching=\true\\n);
 for (i = 0; i  s-nb_streams; i++) {
 AVStream *st = s-streams[i];
 OutputStream *os = c-streams[i];
@@ -516,7 +516,7 @@ static int write_manifest(AVFormatContext *s, int final)
 avio_printf(out, \t\t/AdaptationSet\n);
 }
 if (c-has_audio) {
-avio_printf(out, \t\tAdaptationSet id=\audio\ 
segmentAlignment=\true\ bitstreamSwitching=\true\\n);
+avio_printf(out, \t\tAdaptationSet contentType=\audio\ 
segmentAlignment=\true\ bitstreamSwitching=\true\\n);
 for (i = 0; i  s-nb_streams; i++) {
 AVStream *st = s-streams[i];
 OutputStream *os = c-streams[i];
-- 
2.4.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] avcodec/libx265: use x265 Multi-library Interface to query the API

2015-05-12 Thread James Almer

On 12/05/15 6:00 PM, Luca Barbato wrote:
 On 11/05/15 17:25, Derek Buitenhuis wrote:
 From: Gopu Govindaswamy g...@multicorewareinc.com
 
 
 The x265pic.bitDepth is set on encode_frame while I assume that this
 information should be used at init now.
 
 I'm not sure how recent is this api version, I hope it isn't necessary

The API as used and required by this patch is not available on any tagged 
release
right now. It will be in x265 1.7.

 to consider adding a fallback path even if it is easy with a bunch of
 defines since the signatures look the same beside x264 - ctx-api.
 
 I'll edit the subject and make it fit in case ti does not tomorrow.
 
 Thanks for picking it up.
 
 lu
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel
 

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] avutil: remove pointless bmi1 define

2015-04-18 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavutil/cpu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 354d21e..4e8ef61 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -90,8 +90,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
 #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
-#define CPUFLAG_BMI1 (AV_CPU_FLAG_BMI1)
-#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | CPUFLAG_BMI1)
+#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1)
 static const AVOption cpuflags_opts[] = {
 { flags   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, 
INT64_MAX, .unit = flags },
 #if   ARCH_PPC
@@ -113,7 +112,7 @@ int av_parse_cpu_flags(const char *s)
 { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
 { avx2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2
 },.unit = flags },
-{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI1
 },.unit = flags },
+{ bmi1, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1
 },.unit = flags },
 { bmi2, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI2
 },.unit = flags },
 { 3dnow   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW   
 },.unit = flags },
 { 3dnowext, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT
 },.unit = flags },
-- 
2.3.5

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available

2015-05-19 Thread James Almer

On 19/05/15 7:49 AM, Vittorio Giovara wrote:
 ---
 Another set of eyes for the pixel format mapping would be welcome.
 Vittorio
 
  libavcodec/libvpx.c| 26 ++
  libavcodec/libvpx.h|  2 ++
  libavcodec/libvpxdec.c |  4 ++--
  3 files changed, 30 insertions(+), 2 deletions(-)
 
 diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
 index 20f4484..5adad66 100644
 --- a/libavcodec/libvpx.c
 +++ b/libavcodec/libvpx.c
 @@ -33,3 +33,29 @@ int ff_vp9_check_experimental(AVCodecContext *avctx)
  }
  return 0;
  }
 +
 +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img)
 +{
 +switch(img) {
 +case VPX_IMG_FMT_RGB24: return AV_PIX_FMT_RGB24;
 +case VPX_IMG_FMT_RGB565:return AV_PIX_FMT_RGB565BE;
 +case VPX_IMG_FMT_RGB555:return AV_PIX_FMT_RGB555BE;
 +case VPX_IMG_FMT_UYVY:  return AV_PIX_FMT_UYVY422;
 +case VPX_IMG_FMT_YUY2:  return AV_PIX_FMT_YUYV422;
 +case VPX_IMG_FMT_YVYU:  return AV_PIX_FMT_YVYU422;
 +case VPX_IMG_FMT_BGR24: return AV_PIX_FMT_BGR24;
 +case VPX_IMG_FMT_ARGB:  return AV_PIX_FMT_ARGB;
 +case VPX_IMG_FMT_ARGB_LE:   return AV_PIX_FMT_BGRA;
 +case VPX_IMG_FMT_RGB565_LE: return AV_PIX_FMT_RGB565LE;
 +case VPX_IMG_FMT_RGB555_LE: return AV_PIX_FMT_RGB555LE;
 +case VPX_IMG_FMT_I420:  return AV_PIX_FMT_YUV420P;

vp8 supports only this one. Every other pix_fmt is vp9 only and should be 
guarded by a
CONFIG_LIBVPX_VP9_DECODER preprocessor check.

 +case VPX_IMG_FMT_I422:  return AV_PIX_FMT_YUV422P;
 +case VPX_IMG_FMT_I444:  return AV_PIX_FMT_YUV444P;
 +case VPX_IMG_FMT_I440:  return AV_PIX_FMT_YUV440P;

This was added starting with libvpx 1.4.0. It will fail to compile with any 
prior version.
A quick preprocessor check to make sure this define is available is 
VPX_IMAGE_ABI_VERSION = 3

 +case VPX_IMG_FMT_444A:  return AV_PIX_FMT_YUVA444P;
 +case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE;
 +case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE;
 +case VPX_IMG_FMT_I44416:return AV_PIX_FMT_YUV444P16BE;

Likewise, these three were added with libvpx 1.4.0. Checking for 
VPX_IMG_FMT_HIGHBITDEPTH should
suffice here, or alternatively, the same abi version check as above if git 
snapshots before 1.4.0
was tagged are not important.
And the value of img-bit_depth should probably be checked instead and these 
high bitdepth pix_fmts
set accordingly.

 +default:return AV_PIX_FMT_NONE;
 +}
 +}
 diff --git a/libavcodec/libvpx.h b/libavcodec/libvpx.h
 index cb1ed09..79a05f4 100644
 --- a/libavcodec/libvpx.h
 +++ b/libavcodec/libvpx.h
 @@ -25,4 +25,6 @@
  
  int ff_vp9_check_experimental(AVCodecContext *avctx);
  
 +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img);
 +
  #endif /* AVCODEC_LIBVPX_H */
 diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c
 index 6052207..a1f9c22 100644
 --- a/libavcodec/libvpxdec.c
 +++ b/libavcodec/libvpxdec.c
 @@ -56,7 +56,6 @@ static av_cold int vpx_init(AVCodecContext *avctx,
  return AVERROR(EINVAL);
  }
  
 -avctx-pix_fmt = AV_PIX_FMT_YUV420P;
  return 0;
  }
  
 @@ -82,7 +81,8 @@ static int vp8_decode(AVCodecContext *avctx,
  }
  
  if ((img = vpx_codec_get_frame(ctx-decoder, iter))) {
 -if (img-fmt != VPX_IMG_FMT_I420) {
 +avctx-pix_fmt = ff_vpx_imgfmt_to_pixfmt(img-fmt);
 +if (avctx-pix_fmt == AV_PIX_FMT_NONE) {
  av_log(avctx, AV_LOG_ERROR, Unsupported output colorspace 
 (%d)\n,
 img-fmt);
  return AVERROR_INVALIDDATA;
 

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] mpjpegdec: don't try to alloc an AVIOContext when probe is guaranteed to fail

2015-06-08 Thread James Almer

The first check is done without the AVIOContext, so alloc it only if said check 
succeeds

Signed-off-by: James Almer jamr...@gmail.com
---
 libavformat/mpjpegdec.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c
index 72891e7..e2a2ece 100644
--- a/libavformat/mpjpegdec.c
+++ b/libavformat/mpjpegdec.c
@@ -83,13 +83,13 @@ static int mpjpeg_read_probe(AVProbeData *p)
 char line[128] = { 0 };
 int ret = 0;
 
+if (p-buf_size  2 || p-buf[0] != '-' || p-buf[1] != '-')
+return 0;
+
 pb = avio_alloc_context(p-buf, p-buf_size, 0, NULL, NULL, NULL, NULL);
 if (!pb)
 return AVERROR(ENOMEM);
 
-if (p-buf_size  2 || p-buf[0] != '-' || p-buf[1] != '-')
-goto end;
-
 while (!pb-eof_reached) {
 ret = get_line(pb, line, sizeof(line));
 if (ret  0)
@@ -101,7 +101,7 @@ static int mpjpeg_read_probe(AVProbeData *p)
 break;
 }
 }
-end:
+
 av_free(pb);
 
 return ret;
-- 
2.4.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] Introduce a TextureDSP module

2015-06-05 Thread James Almer

On 02/06/15 8:09 AM, Vittorio Giovara wrote:
 +/* Alpha compression function */
 +static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t 
 *block)
 +{
 +int i, j;
 +int dist, bias, dist4, dist2, bits, mask;
 +int mn, mx;
 +
 +/* Find min/max color */
 +mn = mx = block[3];
 +for (j = 0; j  4; j++) {
 +for (i = 0; i  4; i++) {
 +int val = block[3 + i * 4 + j * stride];
 +if (val  mn)
 +mn = val;
 +else if (val  mx)
 +mx = val;
 +}
 +}
 +
 +AV_ZERO64(dst);

Documentation for AV_ZERO* says Parameters for AV_COPY*, AV_SWAP*,
AV_ZERO* must be naturally aligned. They may be implemented using MMX,
so emms_c() must be called before using any float code afterwards.

Make sure fate passes on x86_32 (targeting anything above i686, which
is when AV_ZERO64 is implemented with MMX movq) as you're using float
code all around.
If it doesn't, then maybe you could bench to see if using AV_WN64 is
faster than AV_ZERO64 + emms_c().
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/2] libvpx: Support all pixel formats available in encoding and decoding

2015-06-11 Thread James Almer

On 11/06/15 11:56 AM, Luca Barbato wrote:
 @@ -321,8 +321,12 @@ static av_cold int vpx_init(AVCodecContext *avctx,
  /* 0-3: For non-zero values the encoder increasingly optimizes for 
 reduced
 complexity playback on low powered devices at the expense of encode
 quality. */
 -   if (avctx-profile != FF_PROFILE_UNKNOWN)
 -   enccfg.g_profile = avctx-profile;
 +if (avctx-profile != FF_PROFILE_UNKNOWN)
 +enccfg.g_profile = avctx-profile;
 +else if (avctx-pix_fmt == AV_PIX_FMT_YUV440P)

As i said before, profile 0 is 8bit yuv420p.

 +avctx-profile = enccfg.g_profile = FF_PROFILE_VP9_0;
 +else
 +avctx-profile = enccfg.g_profile = FF_PROFILE_VP9_1;
 
  enccfg.g_error_resilient = ctx-error_resilient;
 


___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] vpx: Support version 1.3.0

2015-06-11 Thread James Almer

On 11/06/15 11:56 AM, Luca Barbato wrote:
 ---
 
 I tied the supported formats to the ABI version.
 
  configure  | 12 ++--
  libavcodec/libvpx.c|  8 ++--
  libavcodec/libvpxenc.c |  6 +-
  3 files changed, 17 insertions(+), 9 deletions(-)
 
 diff --git a/configure b/configure
 index a416dc2..8cb53d2 100755
 --- a/configure
 +++ b/configure
 @@ -4312,19 +4312,19 @@ enabled libvo_amrwbenc require libvo_amrwbenc 
 vo-amrwbenc/enc_if.h E_IF_in
  enabled libvorbis  require libvorbis vorbis/vorbisenc.h 
 vorbis_info_init -lvorbisenc -lvorbis -logg
  enabled libvpx {
  enabled libvpx_vp8_decoder  {
 -require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver 
 -lvpx ||
 -die ERROR: libvpx encoder version must be =1.4.0;
 +require_pkg_config vpx = 1.3.0 vpx/vpx_decoder.h 
 vpx_codec_dec_init_ver ||
 +die ERROR: libvpx encoder version must be = 1.3.0;
  }
  enabled libvpx_vp8_encoder  {
 -require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver 
 -lvpx ||
 -die ERROR: libvpx encoder version must be =1.4.0;
 +require_pkg_config vpx = 1.3.0 vpx/vpx_encoder.h 
 vpx_codec_enc_init_ver ||
 +die ERROR: libvpx encoder version must be = 1.3.0;
  }
  enabled libvpx_vp9_decoder  {
 -require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver 
 -lvpx ||
 +require_pkg_config vpx = 1.3.0 vpx/vpx_decoder.h 
 vpx_codec_dec_init_ver ||
  disable libvpx_vp9_decoder;
  }
  enabled libvpx_vp9_encoder  {
 -require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver 
 -lvpx ||
 +require_pkg_config vpx = 1.3.0 vpx/vpx_encoder.h 
 vpx_codec_enc_init_ver ||
  disable libvpx_vp9_encoder;

Using require_pkg_config() makes configure abort if the check fails, so the 
disable() calls are dead code.
That's why use_pkg_config() and check_pkg_config exist.
Also, checking for the decoding/encoding header and the init function is 
apparently not enough. You need
to check for the decoding/encoding interfaces vpx_codec_vp[89]_[cd]x because 
libvpx can be built without
one or more of the four components.

enabled libvpx require_pkg_config vpx = 1.3.0 vpx/vpx_codec.h 
vpx_codec_version  {
enabled libvpx_vp8_decoder  { check_pkg_config vpx vpx/vpx_decoder.h 
vpx/vp8dx.h vpx_codec_vp8_dx ||
disable libvpx_vp8_decoder; }
enabled libvpx_vp8_encoder  { check_pkg_config vpx vpx/vpx_encoder.h 
vpx/vp8cx.h vpx_codec_vp8_cx ||
disable libvpx_vp8_encoder; }
enabled libvpx_vp9_decoder  { check_pkg_config vpx vpx/vpx_decoder.h 
vpx/vp8dx.h vpx_codec_vp9_dx ||
disable libvpx_vp9_decoder; }
enabled libvpx_vp9_encoder  { check_pkg_config vpx vpx/vpx_encoder.h 
vpx/vp8cx.h vpx_codec_vp9_cx ||
disable libvpx_vp9_encoder; } }

Updated from the version i posted in a previous email (Which was wrong as it 
only checked for the header
and init function).
This will first check for a recent libvpx, then for each component.

  }
  }
 diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
 index 603ed13..230bc49 100644
 --- a/libavcodec/libvpx.c
 +++ b/libavcodec/libvpx.c
 @@ -39,11 +39,13 @@ enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t 
 img)
  case VPX_IMG_FMT_I420:  return AV_PIX_FMT_YUV420P;
  case VPX_IMG_FMT_I422:  return AV_PIX_FMT_YUV422P;
  case VPX_IMG_FMT_I444:  return AV_PIX_FMT_YUV444P;
 -case VPX_IMG_FMT_I440:  return AV_PIX_FMT_YUV440P;
  case VPX_IMG_FMT_444A:  return AV_PIX_FMT_YUVA444P;
 +#ifdef VPX_IMG_FMT_HIGHBITDEPTH
 +case VPX_IMG_FMT_I440:  return AV_PIX_FMT_YUV440P;

The correct guard for VPX_IMG_FMT_I440 is VPX_IMAGE_ABI_VERSION =3.
libvpx git snapshots post 1.3.0 and pre 1.4.0 may fail because they may define 
VPX_IMG_FMT_HIGHBITDEPTH
but not VPX_IMG_FMT_I440.

  case VPX_IMG_FMT_I42016:return AV_PIX_FMT_YUV420P16BE;
  case VPX_IMG_FMT_I42216:return AV_PIX_FMT_YUV422P16BE;
  case VPX_IMG_FMT_I44416:return AV_PIX_FMT_YUV444P16BE;
 +#endif
  default:return AV_PIX_FMT_NONE;
  }
  }
 @@ -65,11 +67,13 @@ vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat 
 pix)
  case AV_PIX_FMT_YUV420P:  return VPX_IMG_FMT_I420;
  case AV_PIX_FMT_YUV422P:  return VPX_IMG_FMT_I422;
  case AV_PIX_FMT_YUV444P:  return VPX_IMG_FMT_I444;
 -case AV_PIX_FMT_YUV440P:  return VPX_IMG_FMT_I440;
  case AV_PIX_FMT_YUVA444P: return VPX_IMG_FMT_444A;
 +#ifdef VPX_IMG_FMT_HIGHBITDEPTH
 +case AV_PIX_FMT_YUV440P:  return VPX_IMG_FMT_I440;
  case AV_PIX_FMT_YUV420P16BE:  return VPX_IMG_FMT_I42016;
  case AV_PIX_FMT_YUV422P16BE:  return VPX_IMG_FMT_I42216;
  case AV_PIX_FMT_YUV444P16BE:  return VPX_IMG_FMT_I44416;

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding

2015-06-03 Thread James Almer

On 03/06/15 11:29 AM, Luca Barbato wrote:
 On 31/05/15 17:01, Luca Barbato wrote:
 On 27/05/15 22:25, James Almer wrote:
 Yes, that plus a considerable amount of ifdeffery in the code. It
 will be ugly, but i also think it's worth keeping compatibility with
 at least 1.3.0


 1.3.0 explodes on 422p that at least in theory should support. (I'm
 testing all the possible encodings right now).

 
 1.3.0 seems that had been released when not ready, do we really want to
 support it?
 
 lu

It works with 420p content (vp8 and vp9). Support for 422p, 440p, 444p and high 
bit-depth
was officially added with 1.4.0.
1.3.0 should reject the latter stuff, but for some reason it doesn't and it 
encodes garbage.

With some ifdeffery and static init magic both the decoder and encoder can be 
limited to
420p for vpx 1.3.0. But then again, I guess vpx 1.3.0 is old enough by now that 
support for
it can be safely dropped.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] configure: don't enable tls protocols if network is disabled

2015-06-01 Thread James Almer

This was a regression introduced with d8ffb2055f0e0fcb5d025bab72eb19c2a886c125.

Signed-off-by: James Almer jamr...@gmail.com
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index cdc5a8d..a29cd38 100755
--- a/configure
+++ b/configure
@@ -2216,6 +2216,8 @@ srtp_protocol_select=rtp_protocol
 tcp_protocol_select=network
 tls_gnutls_protocol_deps=gnutls
 tls_openssl_protocol_deps=openssl !tls_gnutls_protocol
+tls_gnutls_protocol_select=tcp_protocol
+tls_openssl_protocol_select=tcp_protocol
 tls_protocol_deps_any=tls_gnutls_protocol tls_openssl_protocol
 tls_protocol_select=tcp_protocol
 udp_protocol_select=network
-- 
2.4.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding

2015-05-27 Thread James Almer

On 27/05/15 1:15 PM, Vittorio Giovara wrote:
 Bump the minimum libvpx version to 1.4.0 so that all pixel
 formats are present. Add new VP9 profiles.
 
 Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com
 ---
 Modified as requested.
 Vittorio
 
  configure  | 23 +++--
  libavcodec/avcodec.h   |  4 
  libavcodec/libvpx.c| 56 
 ++
  libavcodec/libvpx.h|  3 ++-
  libavcodec/libvpxdec.c |  7 ++-
  libavcodec/libvpxenc.c | 32 +
  libavcodec/version.h   |  2 +-
  7 files changed, 97 insertions(+), 30 deletions(-)
 
 diff --git a/configure b/configure
 index 863e33b..e26fc54 100755
 --- a/configure
 +++ b/configure
 @@ -4274,12 +4274,23 @@ enabled libvo_aacenc   require libvo_aacenc 
 vo-aacenc/voAAC.h voGetAACEncA
  enabled libvo_amrwbenc require libvo_amrwbenc vo-amrwbenc/enc_if.h 
 E_IF_init -lvo-amrwbenc
  enabled libvorbis  require libvorbis vorbis/vorbisenc.h 
 vorbis_info_init -lvorbisenc -lvorbis -logg
  enabled libvpx {
 -enabled libvpx_vp8_decoder  { check_lib2 vpx/vpx_decoder.h 
 vpx/vp8dx.h vpx_codec_dec_init_ver -lvpx ||
 -die ERROR: libvpx decoder version must 
 be =0.9.1; }
 -enabled libvpx_vp8_encoder  { check_lib2 vpx/vpx_encoder.h 
 vpx/vp8cx.h vpx_codec_enc_init_ver VPX_CQ -lvpx ||
 -die ERROR: libvpx encoder version must 
 be =0.9.6; }
 -enabled libvpx_vp9_decoder  { check_lib2 vpx/vpx_decoder.h 
 vpx/vp8dx.h vpx_codec_vp9_dx -lvpx || disable libvpx_vp9_decoder; }
 -enabled libvpx_vp9_encoder  { check_lib2 vpx/vpx_encoder.h 
 vpx/vp8cx.h vpx_codec_vp9_cx -lvpx || disable libvpx_vp9_encoder; } }
 +enabled libvpx_vp8_decoder  {
 +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver 
 -lvpx ||
 +die ERROR: libvpx encoder version must be =1.4.0;

As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just 
used as a name
to report a failure. It does not check for that version.
Also, all require functions terminate configure with an error if the check 
fails, so these
custom die calls are dead code. If you want to use your own custom error, use 
use_pkg_config
instead.

 +}
 +enabled libvpx_vp8_encoder  {
 +require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver 
 -lvpx ||
 +die ERROR: libvpx encoder version must be =1.4.0;
 +}
 +enabled libvpx_vp9_decoder  {
 +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver 
 -lvpx ||
 +disable libvpx_vp9_decoder;
 +}
 +enabled libvpx_vp9_encoder  {
 +require vpx = 1.4.0 vpx/vpx_encoder.h vpx_codec_enc_init_ver 
 -lvpx ||
 +disable libvpx_vp9_encoder;
 +}
 +}
  enabled libwavpack require libwavpack wavpack/wavpack.h 
 WavpackOpenFileOutput  -lwavpack
  enabled libwebprequire_pkg_config libwebp webp/encode.h 
 WebPGetEncoderVersion
  enabled libx264require_pkg_config x264 stdint.h x264.h 
 x264_encoder_encode 
 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
 index 3440126..16af20c 100644
 --- a/libavcodec/avcodec.h
 +++ b/libavcodec/avcodec.h
 @@ -2702,6 +2702,10 @@ typedef struct AVCodecContext {
  #define FF_PROFILE_JPEG2000_DCINEMA_2K  3
  #define FF_PROFILE_JPEG2000_DCINEMA_4K  4
  
 +#define FF_PROFILE_VP9_00
 +#define FF_PROFILE_VP9_11
 +#define FF_PROFILE_VP9_22
 +#define FF_PROFILE_VP9_33

Even if it's a simple change, credit where credit is due would be nice.

  
  #define FF_PROFILE_HEVC_MAIN1
  #define FF_PROFILE_HEVC_MAIN_10 2
 diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
 index 20f4484..603ed13 100644
 --- a/libavcodec/libvpx.c
 +++ b/libavcodec/libvpx.c
 @@ -22,14 +22,54 @@
  
  #include libvpx.h
  
 -int ff_vp9_check_experimental(AVCodecContext *avctx)
 +enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img)
  {
 -if (avctx-strict_std_compliance  FF_COMPLIANCE_EXPERIMENTAL 
 -(vpx_codec_version_major()  1 ||
 - (vpx_codec_version_major() == 1  vpx_codec_version_minor()  3))) 
 {
 -av_log(avctx, AV_LOG_ERROR,
 -   Non-experimental support of VP9 requires libvpx = 1.3.0\n);
 -return AVERROR_EXPERIMENTAL;
 +switch (img) {
 +case VPX_IMG_FMT_RGB24: return AV_PIX_FMT_RGB24;
 +case VPX_IMG_FMT_RGB565:return AV_PIX_FMT_RGB565BE;
 +case VPX_IMG_FMT_RGB555:return AV_PIX_FMT_RGB555BE;
 +case VPX_IMG_FMT_UYVY:  return AV_PIX_FMT_UYVY422;
 +case VPX_IMG_FMT_YUY2:  return AV_PIX_FMT_YUYV422;
 +case VPX_IMG_FMT_YVYU:  return AV_PIX_FMT_YVYU422;
 +case VPX_IMG_FMT_BGR24: return AV_PIX_FMT_BGR24;
 +case VPX_IMG_FMT_ARGB:  return

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding

2015-05-27 Thread James Almer

On 27/05/15 3:24 PM, James Almer wrote:
 On 27/05/15 1:15 PM, Vittorio Giovara wrote:
 Bump the minimum libvpx version to 1.4.0 so that all pixel
 formats are present. Add new VP9 profiles.

 Signed-off-by: Vittorio Giovara vittorio.giov...@gmail.com
 ---
 Modified as requested.
 Vittorio

  configure  | 23 +++--
  libavcodec/avcodec.h   |  4 
  libavcodec/libvpx.c| 56 
 ++
  libavcodec/libvpx.h|  3 ++-
  libavcodec/libvpxdec.c |  7 ++-
  libavcodec/libvpxenc.c | 32 +
  libavcodec/version.h   |  2 +-
  7 files changed, 97 insertions(+), 30 deletions(-)

 diff --git a/configure b/configure
 index 863e33b..e26fc54 100755
 --- a/configure
 +++ b/configure
 @@ -4274,12 +4274,23 @@ enabled libvo_aacenc   require libvo_aacenc 
 vo-aacenc/voAAC.h voGetAACEncA
  enabled libvo_amrwbenc require libvo_amrwbenc vo-amrwbenc/enc_if.h 
 E_IF_init -lvo-amrwbenc
  enabled libvorbis  require libvorbis vorbis/vorbisenc.h 
 vorbis_info_init -lvorbisenc -lvorbis -logg
  enabled libvpx {
 -enabled libvpx_vp8_decoder  { check_lib2 vpx/vpx_decoder.h 
 vpx/vp8dx.h vpx_codec_dec_init_ver -lvpx ||
 -die ERROR: libvpx decoder version must 
 be =0.9.1; }
 -enabled libvpx_vp8_encoder  { check_lib2 vpx/vpx_encoder.h 
 vpx/vp8cx.h vpx_codec_enc_init_ver VPX_CQ -lvpx ||
 -die ERROR: libvpx encoder version must 
 be =0.9.6; }
 -enabled libvpx_vp9_decoder  { check_lib2 vpx/vpx_decoder.h 
 vpx/vp8dx.h vpx_codec_vp9_dx -lvpx || disable libvpx_vp9_decoder; }
 -enabled libvpx_vp9_encoder  { check_lib2 vpx/vpx_encoder.h 
 vpx/vp8cx.h vpx_codec_vp9_cx -lvpx || disable libvpx_vp9_encoder; } }
 +enabled libvpx_vp8_decoder  {
 +require vpx = 1.4.0 vpx/vpx_decoder.h vpx_codec_dec_init_ver 
 -lvpx ||
 +die ERROR: libvpx encoder version must be =1.4.0;
 
 As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just 
 used as a name
 to report a failure. It does not check for that version.
 Also, all require functions terminate configure with an error if the check 
 fails, so these
 custom die calls are dead code. If you want to use your own custom error, use 
 use_pkg_config
 instead.

Also, since 1.4.0 is the minimum required version now, you can simplify all 
this into

enabled libvpx require_pkg_config vpx = 1.4.0 vpx/vpx_codec.h 
vpx_codec_version  {
enabled_any libvpx_vp8_decoder libvpx_vp9_decoder  { check_pkg_config vpx 
vpx/vpx_decoder.h vpx/vp8dx.h vpx_codec_dec_init_ver ||
   disable 
libvpx_vp8_decoder libvpx_vp9_decoder; }
enabled_any libvpx_vp8_encoder libvpx_vp9_encoder  { check_pkg_config vpx 
vpx/vpx_encoder.h vpx/vp8cx.h vpx_codec_enc_init_ver ||
   disable 
libvpx_vp8_encoder libvpx_vp9_encoder; } }

Which will check for libvpx 1.4.0 first, then for the decoder and encoding 
headers depending
on enabled components.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding

2015-05-27 Thread James Almer

On 27/05/15 5:04 PM, Martin Storsjö wrote:
 On Wed, 27 May 2015, Vittorio Giovara wrote:
 
 Bump the minimum libvpx version to 1.4.0 so that all pixel
 formats are present. Add new VP9 profiles.
 
 Sorry to be a bit late to the party, but how bad would it be to keep compat 
 with older versions? Was there any other argument for dropping older versions 
 than because we can, and x265 did it? Allowing people to build with the 
 earlier versions with the reduced (old/existing) featureset is something that 
 I'd appreciate. I think x265 might have been a bit special case since that 
 involved a bigger API change than this, to the point that keeping compat 
 would be uglier?
 
 Or would it require some ugly static initialization of the pixfmt list? In 
 that case I guess it can be argued that it's simpler just to bump the 
 requirement.

Yes, that plus a considerable amount of ifdeffery in the code.
It will be ugly, but i also think it's worth keeping compatibility with at 
least 1.3.0

 
 // Martin
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH v2 1/3] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag

2015-05-28 Thread James Almer

On 26/05/15 2:29 PM, James Almer wrote:
 Signed-off-by: James Almer jamr...@gmail.com
 ---
 No changes from last revision.
 
  doc/APIchanges  |  3 +++
  libavutil/cpu.c |  3 +++
  libavutil/cpu.h |  1 +
  libavutil/version.h |  4 ++--
  libavutil/x86/cpu.c | 17 ++---
  5 files changed, 23 insertions(+), 5 deletions(-)

Ping

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] libvpx: Support all pixel formats available in encoding and decoding

2015-05-28 Thread James Almer

On 28/05/15 9:11 AM, Vittorio Giovara wrote:
 On Wed, May 27, 2015 at 7:24 PM, James Almer jamr...@gmail.com wrote:
 As Luca said, require_pkg_config. If you use require, vpx = 1.4.0 is just 
 used as a name
 to report a failure. It does not check for that version.
 
 I swear I couldn't get it working with just require_pkg_config, thanks
 for showing how to do that in the next email.

For the record, the example i gave in the other email is only valid if the 
libvpx
requirement is bumped to 1.3.0 or newer. Otherwise, individual tests for each 
component
(like it's done right now) will still be needed.

 
 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
 index 3440126..16af20c 100644
 --- a/libavcodec/avcodec.h
 +++ b/libavcodec/avcodec.h
 @@ -2702,6 +2702,10 @@ typedef struct AVCodecContext {
  #define FF_PROFILE_JPEG2000_DCINEMA_2K  3
  #define FF_PROFILE_JPEG2000_DCINEMA_4K  4

 +#define FF_PROFILE_VP9_00
 +#define FF_PROFILE_VP9_11
 +#define FF_PROFILE_VP9_22
 +#define FF_PROFILE_VP9_33

 Even if it's a simple change, credit where credit is due would be nice.
 
 credit to whom and for what?

https://git.videolan.org/?p=ffmpeg.git;a=commitdiff;h=079b7f6eacc09bc2813fc1ddc230ab05022b69c2
https://git.videolan.org/?p=ffmpeg.git;a=commitdiff;h=01e59d48ed1a41b88107ed1d4d56ae0cbcd1a60e
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] configure: don't enable tls protocols if network is disabled

2015-06-02 Thread James Almer

This was a regression introduced with d8ffb2055f0e0fcb5d025bab72eb19c2a886c125.

Signed-off-by: James Almer jamr...@gmail.com
---
 configure | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 30d6f18..2458adb 100755
--- a/configure
+++ b/configure
@@ -2215,9 +2215,10 @@ sctp_protocol_select=network
 srtp_protocol_select=rtp_protocol
 tcp_protocol_select=network
 tls_gnutls_protocol_deps=gnutls
+tls_gnutls_protocol_select=tcp_protocol
 tls_openssl_protocol_deps=openssl !tls_gnutls_protocol
+tls_openssl_protocol_select=tcp_protocol
 tls_protocol_deps_any=tls_gnutls_protocol tls_openssl_protocol
-tls_protocol_select=tcp_protocol
 udp_protocol_select=network
 unix_protocol_deps=sys_un_h
 unix_protocol_select=network
-- 
2.4.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/4] configure: Require LPDIRECT3DSURFACE9 for dxva2

2015-06-02 Thread James Almer

On 02/06/15 4:27 AM, Martin Storsjö wrote:
 On Mon, 1 Jun 2015, James Almer wrote:
 
 On 01/06/15 7:54 AM, Martin Storsjö wrote:
 This fixes dxva2 detection (i.e. correctly realizes that it isn't
 available) for WinRT, where dxva2api.h does exist, but these definitions
 are omitted (when targeting the API subsets).

 Ideally we should rather check for e.g. DXVA2_ConfigPictureDecode,
 but configure might fail to find that definition due to _WIN32_WINNT
 not being set to the right value during configure. (libavcodec/dxva2.h
 manually overrides the _WIN32_WINNT define.)

 Something like

 enabled dxva2api_h  check_type dxva2api.h DXVA2_ConfigPictureDecode 
 -D_WIN32_WINNT=0x0600 || disable dxva2api_h
 
 Thanks - I somehow missed that check_type can take other parameters to use 
 while compiling.
 
 Should work then. You can put it above the d3d11_cobj check (Which IMO 
 should be removed alongside the d3d11va_lib
 check until actual d3d11 support is added to avconv, for that matter).

 I see this patch was committed already, so up to you if you prefer the above 
 solution or not.
 
 This does sound better indeed (and I agree about removing the extra d3d11 
 things for avconv support which isn't there yet).
 
 Although I think it's a bit more straightforward to just add this as an 
 unconditional check_type call without intermixing it with enabling/disabling 
 dxva2api_h though.

Yeah, i realized after sending that email that you can probably just replace 
the check_type for LPDIRECT3DSURFACE9
with this one, and of course also the relevant dependency on dxva2_deps.

 
 // Martin
 ___
 libav-devel mailing list
 libav-devel@libav.org
 https://lists.libav.org/mailman/listinfo/libav-devel

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] D3D11va: add a Direct3D11 video decoder similar to DXVA2

2015-05-25 Thread James Almer

On 25/05/15 2:31 AM, Steve Lhomme wrote:
 On Sun, May 24, 2015 at 1:13 PM, Luca Barbato lu_z...@gentoo.org wrote:
 On 24/05/15 07:59, Steve Lhomme wrote:
 Any update on this patch ?


 If it works for you I'll merge it Monday. I do not have mean to test it
 directly I guess.
 
 Yes, it works. Building may be tricky until my patches are merged into
 wine  mingw-w64, unless you build with the Microsoft SDK.

In addition to the mingw-w64 breakage i mentioned in another thread, this is
making the h264, hevc and other fate tests fail on msvc x86_32.

https://fate.libav.org/x86_32-msvc11-windows-native/20150525152900
https://fate.libav.org/x86_32-msvc12-windows-native/20150525155646

msvc x86_64 seems unaffected.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] configure: we don't need d3d11va_lib as avconv doesn't support it

2015-05-25 Thread James Almer

On 25/05/15 11:49 AM, Steve Lhomme wrote:
 ---
  configure | 5 -
  1 file changed, 5 deletions(-)
 
 diff --git a/configure b/configure
 index 18280b9..a9ecad1 100755
 --- a/configure
 +++ b/configure
 @@ -1555,7 +1555,6 @@ HAVE_LIST=
  atomics_native
  dos_paths
  d3d11_cobj
 -d3d11va_lib
  dxva2_lib
  libc_msvcrt
  libdc1394_1
 @@ -4618,10 +4617,6 @@ check_deps $CONFIG_LIST   \
 $HAVE_LIST \
 $ALL_COMPONENTS\
  
 -enabled_all d3d11va d3d11_cobj CoTaskMemFree 
 -prepend avconv_libs $($ldflags_filter -lole32) 
 -enable d3d11va_lib
 -
  enabled_all dxva2 CoTaskMemFree 
  prepend avconv_libs $($ldflags_filter -lole32) 
  enable dxva2_lib

You could also remove d3d11_cobj and its configure check, then.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] configure: we don't need d3d11va_lib as avconv doesn't support it

2015-05-25 Thread James Almer

On 25/05/15 8:24 PM, James Almer wrote:
 On 25/05/15 11:49 AM, Steve Lhomme wrote:
 ---
  configure | 5 -
  1 file changed, 5 deletions(-)

 diff --git a/configure b/configure
 index 18280b9..a9ecad1 100755
 --- a/configure
 +++ b/configure
 @@ -1555,7 +1555,6 @@ HAVE_LIST=
  atomics_native
  dos_paths
  d3d11_cobj
 -d3d11va_lib
  dxva2_lib
  libc_msvcrt
  libdc1394_1
 @@ -4618,10 +4617,6 @@ check_deps $CONFIG_LIST   \
 $HAVE_LIST \
 $ALL_COMPONENTS\
  
 -enabled_all d3d11va d3d11_cobj CoTaskMemFree 
 -prepend avconv_libs $($ldflags_filter -lole32) 
 -enable d3d11va_lib
 -
  enabled_all dxva2 CoTaskMemFree 
  prepend avconv_libs $($ldflags_filter -lole32) 
  enable dxva2_lib
 
 You could also remove d3d11_cobj and its configure check, then.

Actually no, don't remove the d3d11_cobj check. Repurpose it as it's the only 
check that actually makes
sure things will work: 
https://fate.libav.org/x86_64-mingw-w64-gcc-5.1/20150525105137/compile

CONFIG_D3D11VA, currently checked in libavcodec, is true if d3d11.h and dxva.h 
exist, but those existing
doesn't mean the needed functionality is there, as shown in the above FATE 
client using a recent mingw-w64
version.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/2] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag

2015-05-25 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
Updated with a new libavutil version after the d3d11 patch.

 doc/APIchanges  |  3 +++
 libavutil/cpu.c |  3 +++
 libavutil/cpu.h |  1 +
 libavutil/version.h |  4 ++--
 libavutil/x86/cpu.c | 17 ++---
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 5d39ec6..2c443b0 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2014-08-09
 
 API changes, most recent first:
 
+2015-xx-xx - xxx - lavu 54.14.0 - cpu.h
+  Add AV_CPU_FLAG_AVXSLOW.
+
 2015-xx-xx - xxx - lavc 56.23.0
   Add av_vda_default_init2.
 
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 4e8ef61..e24b9dd 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
 #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4)
 #define CPUFLAG_AVX  (AV_CPU_FLAG_AVX  | CPUFLAG_SSE42)
+#define CPUFLAG_AVXSLOW  (AV_CPU_FLAG_AVXSLOW  | CPUFLAG_AVX)
 #define CPUFLAG_XOP  (AV_CPU_FLAG_XOP  | CPUFLAG_AVX)
 #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
@@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s)
 { sse4.1  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4
 },.unit = flags },
 { sse4.2  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42   
 },.unit = flags },
 { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX 
 },.unit = flags },
+{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW 
 },.unit = flags },
 { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP 
 },.unit = flags },
 { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
@@ -219,6 +221,7 @@ static const struct {
 { AV_CPU_FLAG_SSE4,  sse4.1 },
 { AV_CPU_FLAG_SSE42, sse4.2 },
 { AV_CPU_FLAG_AVX,   avx},
+{ AV_CPU_FLAG_AVXSLOW,   avxslow},
 { AV_CPU_FLAG_XOP,   xop},
 { AV_CPU_FLAG_FMA3,  fma3   },
 { AV_CPU_FLAG_FMA4,  fma4   },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 7ce..c9469b3 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -45,6 +45,7 @@
 #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions
 #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions
 #define AV_CPU_FLAG_AVX  0x4000 /// AVX functions: requires OS 
support even if YMM registers aren't used
+#define AV_CPU_FLAG_AVXSLOW   0x800 /// AVX supported, but slow when 
using YMM registers (e.g. Bulldozer)
 #define AV_CPU_FLAG_XOP  0x0400 /// Bulldozer XOP functions
 #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions
 #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov
diff --git a/libavutil/version.h b/libavutil/version.h
index 13bb6f0..c3342cd 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -54,8 +54,8 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR 54
-#define LIBAVUTIL_VERSION_MINOR 13
-#define LIBAVUTIL_VERSION_MICRO  1
+#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 8be6d94..098ccf7 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void)
 if (ext_caps  (1  22))
 rval |= AV_CPU_FLAG_MMXEXT;
 
+if (!strncmp(vendor.c, AuthenticAMD, 12)) {
 /* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
@@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void)
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */
-if (!strncmp(vendor.c, AuthenticAMD, 12) 
-rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040)) {
-rval |= AV_CPU_FLAG_SSE2SLOW;
+if (rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040))
+rval |= AV_CPU_FLAG_SSE2SLOW;
+
+/* Similar to the above but for AVX functions on AMD processors.
+   This is necessary only for functions using YMM registers on 
Bulldozer
+   based CPUs as they lack 256-bits execution units. SSE/AVX functions
+   using XMM registers are always faster on them.
+   AV_CPU_FLAG_AVX

[libav-devel] [PATCH 2/2] x86: check for AV_CPU_FLAG_AVXSLOW where useful

2015-05-25 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
The FMA4 functions from libavresample's audio_mix need to be handled
differently. Disabling them if avxslow is true is pointless since no
CPU out there currently has FMA4 and a fast float execution unit. So
I'm thinking about duplicating them and doing:

FMA3 YMM/XMM for current Intel CPUs (Basically, renaming the existing
functions)
FMA4 XMM for current AMD stuff (Regardless of x86_32 or x86_64).

I'll see about implementing that in the coming days.

 libavcodec/x86/dcadsp_init.c   |  4 ++--
 libavcodec/x86/dct_init.c  |  2 +-
 libavcodec/x86/fft_init.c  |  2 +-
 libavfilter/x86/af_volume_init.c   |  2 +-
 libavresample/x86/audio_convert_init.c | 10 ++
 libavresample/x86/audio_mix_init.c | 10 ++
 libavresample/x86/dither_init.c|  4 ++--
 libavutil/x86/float_dsp_init.c |  2 +-
 libavutil/x86/lls_init.c   |  2 +-
 9 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 9acb818..8deb6d6 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -98,10 +98,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 if (EXTERNAL_SSE2(cpu_flags)) {
 s-synth_filter_float = synth_filter_sse2;
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW)) {
 s-synth_filter_float = synth_filter_avx;
 }
-if (EXTERNAL_FMA3(cpu_flags)) {
+if (EXTERNAL_FMA3(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW)) {
 s-synth_filter_float = synth_filter_fma3;
 }
 #endif /* HAVE_YASM */
diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c
index 7bda5e8..660d118 100644
--- a/libavcodec/x86/dct_init.c
+++ b/libavcodec/x86/dct_init.c
@@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s)
 s-dct32 = ff_dct32_float_sse;
 if (EXTERNAL_SSE2(cpu_flags))
 s-dct32 = ff_dct32_float_sse2;
-if (EXTERNAL_AVX(cpu_flags))
+if (EXTERNAL_AVX(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW))
 s-dct32 = ff_dct32_float_avx;
 }
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 7ca72c5..840f348 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s)
 s-fft_calc= ff_fft_calc_sse;
 s-fft_permutation = FF_FFT_PERM_SWAP_LSBS;
 }
-if (EXTERNAL_AVX(cpu_flags)  s-nbits = 5) {
+if (EXTERNAL_AVX(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW)  
s-nbits = 5) {
 /* AVX for SB */
 s-imdct_half  = ff_imdct_half_avx;
 s-fft_calc= ff_fft_calc_avx;
diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c
index c59e0ed..f70bafa 100644
--- a/libavfilter/x86/af_volume_init.c
+++ b/libavfilter/x86/af_volume_init.c
@@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol)
 vol-scale_samples = ff_scale_samples_s32_ssse3_atom;
 vol-samples_align = 4;
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW)) {
 vol-scale_samples = ff_scale_samples_s32_avx;
 vol-samples_align = 8;
 }
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d85ca84..1aab0f7 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -227,10 +227,12 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   6, 16, 4, SSE4, 
ff_conv_fltp_to_flt_6ch_sse4);
 }
 if (EXTERNAL_AVX(cpu_flags)) {
-ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
-  0, 32, 16, AVX, ff_conv_s32_to_flt_avx);
-ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
-  0, 32, 32, AVX, ff_conv_flt_to_s32_avx);
+if (!(cpu_flags  AV_CPU_FLAG_AVXSLOW)) {
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
+  0, 32, 16, AVX, 
ff_conv_s32_to_flt_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
+  0, 32, 32, AVX, 
ff_conv_flt_to_s32_avx);
+}
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   2, 16, 16, AVX, 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
diff --git a/libavresample/x86/audio_mix_init.c 
b/libavresample/x86/audio_mix_init.c
index 7fc530e..4fc2749 100644
--- a/libavresample/x86/audio_mix_init.c
+++ b/libavresample/x86/audio_mix_init.c
@@ -196,10 +196,12 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am

[libav-devel] [PATCH] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag

2015-05-22 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 doc/APIchanges  |  3 +++
 libavutil/cpu.c |  3 +++
 libavutil/cpu.h |  1 +
 libavutil/version.h |  4 ++--
 libavutil/x86/cpu.c | 17 ++---
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 5d39ec6..b126364 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2014-08-09
 
 API changes, most recent first:
 
+2015-xx-xx - xxx - lavu 54.13.0 - cpu.h
+  Add AV_CPU_FLAG_AVXSLOW.
+
 2015-xx-xx - xxx - lavc 56.23.0
   Add av_vda_default_init2.
 
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 4e8ef61..e24b9dd 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
 #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4)
 #define CPUFLAG_AVX  (AV_CPU_FLAG_AVX  | CPUFLAG_SSE42)
+#define CPUFLAG_AVXSLOW  (AV_CPU_FLAG_AVXSLOW  | CPUFLAG_AVX)
 #define CPUFLAG_XOP  (AV_CPU_FLAG_XOP  | CPUFLAG_AVX)
 #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
@@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s)
 { sse4.1  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4
 },.unit = flags },
 { sse4.2  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42   
 },.unit = flags },
 { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX 
 },.unit = flags },
+{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW 
 },.unit = flags },
 { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP 
 },.unit = flags },
 { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
@@ -219,6 +221,7 @@ static const struct {
 { AV_CPU_FLAG_SSE4,  sse4.1 },
 { AV_CPU_FLAG_SSE42, sse4.2 },
 { AV_CPU_FLAG_AVX,   avx},
+{ AV_CPU_FLAG_AVXSLOW,   avxslow},
 { AV_CPU_FLAG_XOP,   xop},
 { AV_CPU_FLAG_FMA3,  fma3   },
 { AV_CPU_FLAG_FMA4,  fma4   },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 7ce..c9469b3 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -45,6 +45,7 @@
 #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions
 #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions
 #define AV_CPU_FLAG_AVX  0x4000 /// AVX functions: requires OS 
support even if YMM registers aren't used
+#define AV_CPU_FLAG_AVXSLOW   0x800 /// AVX supported, but slow when 
using YMM registers (e.g. Bulldozer)
 #define AV_CPU_FLAG_XOP  0x0400 /// Bulldozer XOP functions
 #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions
 #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov
diff --git a/libavutil/version.h b/libavutil/version.h
index 9c45e0e..378f7b7 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -54,8 +54,8 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR 54
-#define LIBAVUTIL_VERSION_MINOR 12
-#define LIBAVUTIL_VERSION_MICRO  1
+#define LIBAVUTIL_VERSION_MINOR 13
+#define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 8be6d94..098ccf7 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void)
 if (ext_caps  (1  22))
 rval |= AV_CPU_FLAG_MMXEXT;
 
+if (!strncmp(vendor.c, AuthenticAMD, 12)) {
 /* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
@@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void)
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */
-if (!strncmp(vendor.c, AuthenticAMD, 12) 
-rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040)) {
-rval |= AV_CPU_FLAG_SSE2SLOW;
+if (rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040))
+rval |= AV_CPU_FLAG_SSE2SLOW;
+
+/* Similar to the above but for AVX functions on AMD processors.
+   This is necessary only for functions using YMM registers on 
Bulldozer
+   based CPUs as they lack 256-bits execution units. SSE/AVX functions
+   using XMM registers are always faster on them.
+   AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
+   used

[libav-devel] [PATCH v2 3/3] x86: check for AV_CPU_FLAG_AVXSLOW where useful

2015-05-26 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp_init.c   | 4 ++--
 libavcodec/x86/dct_init.c  | 2 +-
 libavcodec/x86/fft_init.c  | 2 +-
 libavfilter/x86/af_volume_init.c   | 2 +-
 libavresample/x86/audio_convert_init.c | 4 +++-
 libavresample/x86/audio_mix_init.c | 4 +++-
 libavresample/x86/dither_init.c| 4 ++--
 libavutil/x86/float_dsp_init.c | 2 +-
 libavutil/x86/lls_init.c   | 2 +-
 9 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 9acb818..7c2bec1 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -98,10 +98,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 if (EXTERNAL_SSE2(cpu_flags)) {
 s-synth_filter_float = synth_filter_sse2;
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX_FAST(cpu_flags)) {
 s-synth_filter_float = synth_filter_avx;
 }
-if (EXTERNAL_FMA3(cpu_flags)) {
+if (EXTERNAL_FMA3(cpu_flags)  !(cpu_flags  AV_CPU_FLAG_AVXSLOW)) {
 s-synth_filter_float = synth_filter_fma3;
 }
 #endif /* HAVE_YASM */
diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c
index 7bda5e8..ca9fbc7 100644
--- a/libavcodec/x86/dct_init.c
+++ b/libavcodec/x86/dct_init.c
@@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s)
 s-dct32 = ff_dct32_float_sse;
 if (EXTERNAL_SSE2(cpu_flags))
 s-dct32 = ff_dct32_float_sse2;
-if (EXTERNAL_AVX(cpu_flags))
+if (EXTERNAL_AVX_FAST(cpu_flags))
 s-dct32 = ff_dct32_float_avx;
 }
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 7ca72c5..5c0273d 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s)
 s-fft_calc= ff_fft_calc_sse;
 s-fft_permutation = FF_FFT_PERM_SWAP_LSBS;
 }
-if (EXTERNAL_AVX(cpu_flags)  s-nbits = 5) {
+if (EXTERNAL_AVX_FAST(cpu_flags)  s-nbits = 5) {
 /* AVX for SB */
 s-imdct_half  = ff_imdct_half_avx;
 s-fft_calc= ff_fft_calc_avx;
diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c
index c59e0ed..26605fb 100644
--- a/libavfilter/x86/af_volume_init.c
+++ b/libavfilter/x86/af_volume_init.c
@@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol)
 vol-scale_samples = ff_scale_samples_s32_ssse3_atom;
 vol-samples_align = 4;
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX_FAST(cpu_flags)) {
 vol-scale_samples = ff_scale_samples_s32_avx;
 vol-samples_align = 8;
 }
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d85ca84..ae6c319 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -226,11 +226,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, SSE4, 
ff_conv_fltp_to_flt_6ch_sse4);
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX_FAST(cpu_flags)) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
   0, 32, 16, AVX, ff_conv_s32_to_flt_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
   0, 32, 32, AVX, ff_conv_flt_to_s32_avx);
+}
+if (EXTERNAL_AVX(cpu_flags)) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   2, 16, 16, AVX, 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
diff --git a/libavresample/x86/audio_mix_init.c 
b/libavresample/x86/audio_mix_init.c
index 7fc530e..e14a540 100644
--- a/libavresample/x86/audio_mix_init.c
+++ b/libavresample/x86/audio_mix_init.c
@@ -195,11 +195,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
   1, 2, 16, 8, SSE4, 
ff_mix_1_to_2_s16p_flt_sse4);
 }
-if (EXTERNAL_AVX(cpu_flags)) {
+if (EXTERNAL_AVX_FAST(cpu_flags)) {
 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
   2, 1, 32, 16, AVX, ff_mix_2_to_1_fltp_flt_avx);
 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
   1, 2, 32, 8, AVX, ff_mix_1_to_2_fltp_flt_avx);
+}
+if (EXTERNAL_AVX(cpu_flags)) {
 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
   1, 2, 16, 8, AVX, ff_mix_1_to_2_s16p_flt_avx);
 }
diff --git a/libavresample/x86/dither_init.c b/libavresample/x86

[libav-devel] [PATCH v2 1/3] x86/cpu: add AV_CPU_FLAG_AVXSLOW flag

2015-05-26 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
No changes from last revision.

 doc/APIchanges  |  3 +++
 libavutil/cpu.c |  3 +++
 libavutil/cpu.h |  1 +
 libavutil/version.h |  4 ++--
 libavutil/x86/cpu.c | 17 ++---
 5 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 5d39ec6..2c443b0 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2014-08-09
 
 API changes, most recent first:
 
+2015-xx-xx - xxx - lavu 54.14.0 - cpu.h
+  Add AV_CPU_FLAG_AVXSLOW.
+
 2015-xx-xx - xxx - lavc 56.23.0
   Add av_vda_default_init2.
 
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 4e8ef61..e24b9dd 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
 #define CPUFLAG_SSE42(AV_CPU_FLAG_SSE42| CPUFLAG_SSE4)
 #define CPUFLAG_AVX  (AV_CPU_FLAG_AVX  | CPUFLAG_SSE42)
+#define CPUFLAG_AVXSLOW  (AV_CPU_FLAG_AVXSLOW  | CPUFLAG_AVX)
 #define CPUFLAG_XOP  (AV_CPU_FLAG_XOP  | CPUFLAG_AVX)
 #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
 #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
@@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s)
 { sse4.1  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4
 },.unit = flags },
 { sse4.2  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42   
 },.unit = flags },
 { avx , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX 
 },.unit = flags },
+{ avxslow , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW 
 },.unit = flags },
 { xop , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP 
 },.unit = flags },
 { fma3, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3
 },.unit = flags },
 { fma4, NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4
 },.unit = flags },
@@ -219,6 +221,7 @@ static const struct {
 { AV_CPU_FLAG_SSE4,  sse4.1 },
 { AV_CPU_FLAG_SSE42, sse4.2 },
 { AV_CPU_FLAG_AVX,   avx},
+{ AV_CPU_FLAG_AVXSLOW,   avxslow},
 { AV_CPU_FLAG_XOP,   xop},
 { AV_CPU_FLAG_FMA3,  fma3   },
 { AV_CPU_FLAG_FMA4,  fma4   },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 7ce..c9469b3 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -45,6 +45,7 @@
 #define AV_CPU_FLAG_SSE4 0x0100 /// Penryn SSE4.1 functions
 #define AV_CPU_FLAG_SSE420x0200 /// Nehalem SSE4.2 functions
 #define AV_CPU_FLAG_AVX  0x4000 /// AVX functions: requires OS 
support even if YMM registers aren't used
+#define AV_CPU_FLAG_AVXSLOW   0x800 /// AVX supported, but slow when 
using YMM registers (e.g. Bulldozer)
 #define AV_CPU_FLAG_XOP  0x0400 /// Bulldozer XOP functions
 #define AV_CPU_FLAG_FMA4 0x0800 /// Bulldozer FMA4 functions
 #define AV_CPU_FLAG_CMOV 0x1000 /// i686 cmov
diff --git a/libavutil/version.h b/libavutil/version.h
index 13bb6f0..c3342cd 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -54,8 +54,8 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR 54
-#define LIBAVUTIL_VERSION_MINOR 13
-#define LIBAVUTIL_VERSION_MICRO  1
+#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 8be6d94..098ccf7 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void)
 if (ext_caps  (1  22))
 rval |= AV_CPU_FLAG_MMXEXT;
 
+if (!strncmp(vendor.c, AuthenticAMD, 12)) {
 /* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
@@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void)
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */
-if (!strncmp(vendor.c, AuthenticAMD, 12) 
-rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040)) {
-rval |= AV_CPU_FLAG_SSE2SLOW;
+if (rval  AV_CPU_FLAG_SSE2  !(ecx  0x0040))
+rval |= AV_CPU_FLAG_SSE2SLOW;
+
+/* Similar to the above but for AVX functions on AMD processors.
+   This is necessary only for functions using YMM registers on 
Bulldozer
+   based CPUs as they lack 256-bits execution units. SSE/AVX functions
+   using XMM registers are always faster on them.
+   AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so

[libav-devel] [PATCH v2 2/3] x86/cpu: add helper macros to check for slow cpuflags

2015-05-26 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavutil/cpu_internal.h | 12 
 libavutil/x86/cpu.h  | 18 ++
 2 files changed, 30 insertions(+)

diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 3bfe8a8..2e9b44b 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -24,8 +24,20 @@
 #define CPUEXT_SUFFIX(flags, suffix, cpuext)\
 (HAVE_ ## cpuext ## suffix  ((flags)  AV_CPU_FLAG_ ## cpuext))
 
+#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext)   \
+(HAVE_ ## cpuext ## suffix  ((flags)  AV_CPU_FLAG_ ## cpuext)  \
+ !((flags)  AV_CPU_FLAG_ ## cpuext ## SLOW))
+
+#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext)   \
+(HAVE_ ## cpuext ## suffix  ((flags)  AV_CPU_FLAG_ ## cpuext)  \
+ ((flags)  AV_CPU_FLAG_ ## cpuext ## SLOW))
+
 #define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
 
+#define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext)
+
+#define CPUEXT_SLOW(flags, cpuext) CPUEXT_SUFFIX_SLOW(flags, , cpuext)
+
 int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 50da30e..0695436 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -32,11 +32,17 @@
 #define X86_MMXEXT(flags)   CPUEXT(flags, MMXEXT)
 #define X86_SSE(flags)  CPUEXT(flags, SSE)
 #define X86_SSE2(flags) CPUEXT(flags, SSE2)
+#define X86_SSE2_FAST(flags)CPUEXT_FAST(flags, SSE2)
+#define X86_SSE2_SLOW(flags)CPUEXT_SLOW(flags, SSE2)
 #define X86_SSE3(flags) CPUEXT(flags, SSE3)
+#define X86_SSE3_FAST(flags)CPUEXT_FAST(flags, SSE3)
+#define X86_SSE3_SLOW(flags)CPUEXT_SLOW(flags, SSE3)
 #define X86_SSSE3(flags)CPUEXT(flags, SSSE3)
 #define X86_SSE4(flags) CPUEXT(flags, SSE4)
 #define X86_SSE42(flags)CPUEXT(flags, SSE42)
 #define X86_AVX(flags)  CPUEXT(flags, AVX)
+#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX)
+#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX)
 #define X86_XOP(flags)  CPUEXT(flags, XOP)
 #define X86_FMA3(flags) CPUEXT(flags, FMA3)
 #define X86_FMA4(flags) CPUEXT(flags, FMA4)
@@ -48,11 +54,17 @@
 #define EXTERNAL_MMXEXT(flags)  CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT)
 #define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE)
 #define EXTERNAL_SSE2(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2)
+#define EXTERNAL_SSE2_FAST(flags)   CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2)
+#define EXTERNAL_SSE2_SLOW(flags)   CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2)
 #define EXTERNAL_SSE3(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3)
+#define EXTERNAL_SSE3_FAST(flags)   CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3)
+#define EXTERNAL_SSE3_SLOW(flags)   CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3)
 #define EXTERNAL_SSSE3(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3)
 #define EXTERNAL_SSE4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4)
 #define EXTERNAL_SSE42(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
 #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
+#define EXTERNAL_AVX_FAST(flags)CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX)
+#define EXTERNAL_AVX_SLOW(flags)CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX)
 #define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
 #define EXTERNAL_FMA3(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
 #define EXTERNAL_FMA4(flags)CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
@@ -64,11 +76,17 @@
 #define INLINE_MMXEXT(flags)CPUEXT_SUFFIX(flags, _INLINE, MMXEXT)
 #define INLINE_SSE(flags)   CPUEXT_SUFFIX(flags, _INLINE, SSE)
 #define INLINE_SSE2(flags)  CPUEXT_SUFFIX(flags, _INLINE, SSE2)
+#define INLINE_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE2)
+#define INLINE_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE2)
 #define INLINE_SSE3(flags)  CPUEXT_SUFFIX(flags, _INLINE, SSE3)
+#define INLINE_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE3)
+#define INLINE_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE3)
 #define INLINE_SSSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSSE3)
 #define INLINE_SSE4(flags)  CPUEXT_SUFFIX(flags, _INLINE, SSE4)
 #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
 #define INLINE_AVX(flags)   CPUEXT_SUFFIX(flags, _INLINE, AVX)
+#define INLINE_AVX_FAST(flags)  CPUEXT_SUFFIX_FAST(flags, _INLINE, AVX)
+#define INLINE_AVX_SLOW(flags)  CPUEXT_SUFFIX_SLOW(flags, _INLINE, AVX)
 #define INLINE_XOP(flags)   CPUEXT_SUFFIX(flags, _INLINE, XOP)
 #define INLINE_FMA3(flags)  CPUEXT_SUFFIX(flags, _INLINE, FMA3)
 #define INLINE_FMA4(flags)  CPUEXT_SUFFIX(flags, _INLINE, FMA4)
-- 
2.4.1

Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC

2015-08-19 Thread James Almer

On 19/08/15 4:43 PM, Anton Khirnov wrote:
 ---
  libavcodec/hevc.c |   6 +-
  libavcodec/hevc.h |   2 +-
  libavcodec/hevcdsp.c  |  24 +-
  libavcodec/hevcdsp.h  |   5 +-
  libavcodec/hevcdsp_template.c |   8 +-
  libavcodec/x86/Makefile   |   3 +-
  libavcodec/x86/hevc_mc.asm| 816 
 ++
  libavcodec/x86/hevcdsp_init.c | 405 +
  8 files changed, 1258 insertions(+), 11 deletions(-)
  create mode 100644 libavcodec/x86/hevc_mc.asm

I'm getting segmentation faults with quite a few of samples.
For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC

2015-08-19 Thread James Almer

On 19/08/15 8:23 PM, Ronald S. Bultje wrote:
 Hi,
 
 On Wed, Aug 19, 2015 at 6:34 PM, James Almer jamr...@gmail.com wrote:
 
 On 19/08/15 4:43 PM, Anton Khirnov wrote:
 ---
  libavcodec/hevc.c |   6 +-
  libavcodec/hevc.h |   2 +-
  libavcodec/hevcdsp.c  |  24 +-
  libavcodec/hevcdsp.h  |   5 +-
  libavcodec/hevcdsp_template.c |   8 +-
  libavcodec/x86/Makefile   |   3 +-
  libavcodec/x86/hevc_mc.asm| 816
 ++
  libavcodec/x86/hevcdsp_init.c | 405 +
  8 files changed, 1258 insertions(+), 11 deletions(-)
  create mode 100644 libavcodec/x86/hevc_mc.asm

 I'm getting segmentation faults with quite a few of samples.
 For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts
 
 
 So, at the risk of godwin, why was this reimplemented from scratch, rather
 than basing it on what ffmpeg has? How could this possibly be an advantage
 to our users?

Or OpenHEVC for that matter, which is the source of almost every hevc asm
optimization, x86 or otherwise, and a project that afaik branched off libav.

 
 Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] h264: Fix faulty call to avpriv_request_sample

2015-08-21 Thread James Almer

On 21/08/15 8:43 PM, Luca Barbato wrote:
 Broken in f9ab4fe1f7c1e9d410ca5ee2c9ff8d2892aad068
 ---
 
 Sorry.
 
  libavcodec/h264_sei.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
 index 361d4de..ddf1b6f 100644
 --- a/libavcodec/h264_sei.c
 +++ b/libavcodec/h264_sei.c
 @@ -171,7 +171,7 @@ static int 
 decode_registered_user_data_closed_caption(H264Context *h, int size)
  }
  } else {
  int i;
 -avpriv_request_sample(Subtitles with data type 0x%02x,
 +avpriv_request_sample(h-avctx, Subtitles with data type 0x%02x,
user_data_type_code);
  for (i = 0; i  size - 1; i++)
  skip_bits(h-gb, 8);
 --
 2.5.0

Looks good.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC

2015-08-21 Thread James Almer

On 21/08/15 4:19 AM, Anton Khirnov wrote:
 +
 +add   dstq, dststrideq
 +add   srcq, srcstrideq
 +
 +%assign i (i + 1)
 +%endrep
 +
 +dec heightq

This and every other case should be heightd. There's no guarantee the high bits 
will be zero
on every x86_64 target.
This is the source of the crashes i was getting.

 +jg .loop
 +RET
 +%endmacro
 +
 +INIT_XMM sse2
 +GET_PIXELS 4,  8, 1
 +GET_PIXELS 8,  8, 1
 +GET_PIXELS 12, 8, 3
 +GET_PIXELS 16, 8, 2
 +GET_PIXELS 24, 8, 3
 +GET_PIXELS 32, 8, 3
 +GET_PIXELS 48, 8, 3
 +GET_PIXELS 64, 8, 3
 +
 +GET_PIXELS 4,  10, 1
 +GET_PIXELS 8,  10, 1
 +GET_PIXELS 12, 10, 3
 +GET_PIXELS 16, 10, 2
 +GET_PIXELS 24, 10, 3
 +GET_PIXELS 32, 10, 3
 +GET_PIXELS 48, 10, 3
 +GET_PIXELS 64, 10, 3
 +
 +; hevc_qpel_h/v_w_8(int16_t *dst, ptrdiff_t dststride,
 +; uint8_t *src, ptrdiff_t srcstride,
 +; int height, int mx, int my, int *mcbuffer)
 +
 +; 8-bit qpel interpolation
 +; %1: block width
 +; %2: 0 - horizontal; 1 - vertical
 +%macro QPEL_8 2
 +%if %2
 +%define postfixv
 +%define mvfrac myq

Same here and below the else, rename this to mvfracq and add a mvfracd.

 +%define pixstride  srcstrideq
 +%define pixstride3 sstride3q
 +%define src_m3 srcm3q
 +%else
 +%define postfixh
 +%define mvfrac mxq
 +%define pixstride  1
 +%define pixstride3 3
 +%define src_m3 (srcq - 3)
 +%endif
 +
 +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, 
 src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg
 +%if %2
 +and   mvfrac, 0x3
 +%endif
 +dec   mvfrac
 +shl   mvfrac, 4

Use mvfracd on these three, it will clear the high bits for the mova below.

 +lea   coeffsregq, [hevc_qpel_coeffs8]
 +mova  m0, [coeffsregq + mvfrac]

Then use mvfraq here. Replicate this on every function, of course.

 +
 +%macro PUT_WEIGHTED_PRED 3
 +%if %1
 +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, 
 weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, 
 height
 +%else
 +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, 
 offset0, dst, dststride, src0, srcstride, height
 +%endif
 +and heightq,0x7fff

You should be able to remove this after the above changes.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [libav-commits] h264: Discard currently unsupported registered sei

2015-08-21 Thread James Almer

 Module: libav
 Branch: master
 Commit: f9ab4fe1f7c1e9d410ca5ee2c9ff8d2892aad068

 Author:John Högberg john.hogberg at ericsson.com
 Committer: Luca Barbato lu_zero at gentoo.org
 Date:  Fri Aug  7 19:30:38 2015 +

 h264: Discard currently unsupported registered sei

 Signed-off-by: Luca Barbato lu_zero at gentoo.org

 ---

  libavcodec/h264_sei.c |6 ++
  1 file changed, 6 insertions(+)

 diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
 index 8b07682..361d4de 100644
 --- a/libavcodec/h264_sei.c
 +++ b/libavcodec/h264_sei.c
 @@ -169,6 +169,12 @@ static int 
 decode_registered_user_data_closed_caption(H264Context *h, int size)
  skip_bits(h-gb, 8);   // marker_bits
  }
  }
 +} else {
 +int i;
 +avpriv_request_sample(Subtitles with data type 0x%02x,

This should be

avpriv_request_sample(h-avctx, Subtitles with data type 0x%02x,

Or similar. FATE is almost 50% red by now because of this...
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC

2015-08-22 Thread James Almer

On 21/08/15 4:19 AM, Anton Khirnov wrote:
 +%macro PUT_WEIGHTED_PRED 3
 +%if %1
 +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, 
 weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, 
 height
 +%else
 +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, 
 offset0, dst, dststride, src0, srcstride, height
 +%endif
 +and heightq,0x7fff
 +
 +add denomq, 14 + %1 - %3
 +movqm0, denomq

demon is an uint8_t. This should be

add denomd, 14 + %1 - %3
movdm0, denomd

I don't think doing a movzx denomd, denomb to clear bits 9 to 31 is necessary, 
so
the above should suffice.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC

2015-08-22 Thread James Almer

On 22/08/15 1:16 PM, Anton Khirnov wrote:
 +%macro QPEL_8 2
 +%if %2
 +%define postfixv
 +%define mvfrac myq

 Same here and below the else, rename this to mvfracq and add a mvfracd.

 +%define pixstride  srcstrideq
 +%define pixstride3 sstride3q
 +%define src_m3 srcm3q
 +%else
 +%define postfixh
 +%define mvfrac mxq
 +%define pixstride  1
 +%define pixstride3 3
 +%define src_m3 (srcq - 3)
 +%endif
 +
 +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, 
 src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg

This should be 7, 10, 7, Otherwise you're loading sstride3 from stack as if it 
were
a function argument.
Ideally though, for vertical you'd use 5, 9, 7 then manually load either mx or 
my
instead of both, saving one register, or even 5, 8, 7, since coeffsreg and 
mvfrac
are only used during init, and you can easily reuse one of those two registers 
for
sstride3 or srcm3.
You can also push it down to 4, 7, 7 if you manually load height before or after
the SPLATWs and reuse the regs for coeffsreg and mvfrac. As a plus, this would 
make
the functions work with x86_32.

For horizontal you don't even need sstride3 or srcm3, so you definitely should
declare and use less registers.

Didn't check other functions but I'm sure similar optimizations can be done.

 +%if %2
 +and   mvfrac, 0x3
 +%endif
 +dec   mvfrac
 +shl   mvfrac, 4

 Use mvfracd on these three, it will clear the high bits for the mova below.
 
 anding the whole register with 3/7 should also work fine, with less
 clutter.

and mvfrac, 0x3 is only in ff_hevc_qpel_v_* functions, but not 
ff_hevc_qpel_h_*.
It's the same with the and mvfrac, 0x7 cases below. You need to use the d 
suffix
instead of q on the register names to make sure the high bits are cleared.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] hevcdsp: add x86 SIMD for MC

2015-08-23 Thread James Almer

On 23/08/15 3:27 PM, Anton Khirnov wrote:
 Quoting James Almer (2015-08-22 23:58:41)
 On 22/08/15 1:16 PM, Anton Khirnov wrote:
 +%macro QPEL_8 2
 +%if %2
 +%define postfixv
 +%define mvfrac myq

 Same here and below the else, rename this to mvfracq and add a mvfracd.

 +%define pixstride  srcstrideq
 +%define pixstride3 sstride3q
 +%define src_m3 srcm3q
 +%else
 +%define postfixh
 +%define mvfrac mxq
 +%define pixstride  1
 +%define pixstride3 3
 +%define src_m3 (srcq - 3)
 +%endif
 +
 +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, 
 dststride, src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg

 This should be 7, 10, 7, Otherwise you're loading sstride3 from stack as if 
 it were
 a function argument.
 Ideally though, for vertical you'd use 5, 9, 7 then manually load either mx 
 or my
 instead of both, saving one register, or even 5, 8, 7, since coeffsreg and 
 mvfrac
 are only used during init, and you can easily reuse one of those two 
 registers for
 sstride3 or srcm3.
 You can also push it down to 4, 7, 7 if you manually load height before or 
 after
 the SPLATWs and reuse the regs for coeffsreg and mvfrac. As a plus, this 
 would make
 the functions work with x86_32.

 For horizontal you don't even need sstride3 or srcm3, so you definitely 
 should
 declare and use less registers.

 Didn't check other functions but I'm sure similar optimizations can be done.

 +%if %2
 +and   mvfrac, 0x3
 +%endif
 +dec   mvfrac
 +shl   mvfrac, 4

 Use mvfracd on these three, it will clear the high bits for the mova below.

 anding the whole register with 3/7 should also work fine, with less
 clutter.

 and mvfrac, 0x3 is only in ff_hevc_qpel_v_* functions, but not 
 ff_hevc_qpel_h_*.
 It's the same with the and mvfrac, 0x7 cases below.
 
 Sure, I meant to change the code so it's done in both paths.

It's not necessary. Just use the 32bit gprs.

 You need to use the d suffix
 instead of q on the register names to make sure the high bits are cleared.
 
 Eh? Perhaps I'm misunderstading something, but I'd expect that using d
 here would do exactly the opposite and keep the random data in the high bits.

No, using d to write a gprs on x86_64 will clear the high bits (32 to 63) in a 
similar
way that using VEX coding instructions to write xmm registers will clear bits 
128 to
255 on ymm registers.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 8/8] hevcdsp: add x86 SIMD for MC

2015-08-20 Thread James Almer

On 20/08/15 3:35 AM, Anton Khirnov wrote:
 Quoting James Almer (2015-08-20 00:34:58)
 On 19/08/15 4:43 PM, Anton Khirnov wrote:
 ---
  libavcodec/hevc.c |   6 +-
  libavcodec/hevc.h |   2 +-
  libavcodec/hevcdsp.c  |  24 +-
  libavcodec/hevcdsp.h  |   5 +-
  libavcodec/hevcdsp_template.c |   8 +-
  libavcodec/x86/Makefile   |   3 +-
  libavcodec/x86/hevc_mc.asm| 816 
 ++
  libavcodec/x86/hevcdsp_init.c | 405 +
  8 files changed, 1258 insertions(+), 11 deletions(-)
  create mode 100644 libavcodec/x86/hevc_mc.asm

 I'm getting segmentation faults with quite a few of samples.
 For example http://www.elecard.com/assets/files/other/clips/bbb_1080p_c.ts
 
 Cannot reproduce here. Can you give me more details (system, where
 exactly does it crash, etc.)?
 

Mingw-w64 GCC 5.2.0. It also crashes with checkasm after patch 7/8, but in a 
different place.
With checkasm i get:

Program received signal SIGSEGV, Segmentation fault.
0x0046ae00 in put_hevc_qpel_pixels_4_8 ()
at /home/jamrial/libav/libavcodec/hevcdsp_template.c:41
41  }
(gdb) disass $pc-32,$pc+32
Dump of assembler code from 0x46ade0 to 0x46ae20:
   0x0046ade0 put_hevc_qpel_pixels_4_8+0: mov0x28(%rsp),%r11d
   0x0046ade5 put_hevc_qpel_pixels_4_8+5: xor%r10d,%r10d
   0x0046ade8 put_hevc_qpel_pixels_4_8+8: and
$0xfffe,%rdx
   0x0046adec put_hevc_qpel_pixels_4_8+12:test   %r11d,%r11d
   0x0046adef put_hevc_qpel_pixels_4_8+15:jle0x46ae3c 
put_hevc_qpel_pixels_4_8+92
   0x0046adf1 put_hevc_qpel_pixels_4_8+17:nopl   0x0(%rax,%rax,1)
   0x0046adf6 put_hevc_qpel_pixels_4_8+22:nopw   
%cs:0x0(%rax,%rax,1)
= 0x0046ae00 put_hevc_qpel_pixels_4_8+32:movzbl (%r8),%eax
   0x0046ae04 put_hevc_qpel_pixels_4_8+36:inc%r10d
   0x0046ae07 put_hevc_qpel_pixels_4_8+39:shl$0x6,%eax
   0x0046ae0a put_hevc_qpel_pixels_4_8+42:mov%ax,(%rcx)
   0x0046ae0d put_hevc_qpel_pixels_4_8+45:movzbl 0x1(%r8),%eax
   0x0046ae12 put_hevc_qpel_pixels_4_8+50:shl$0x6,%eax
   0x0046ae15 put_hevc_qpel_pixels_4_8+53:mov%ax,0x2(%rcx)
   0x0046ae19 put_hevc_qpel_pixels_4_8+57:movzbl 0x2(%r8),%eax
   0x0046ae1e put_hevc_qpel_pixels_4_8+62:shl$0x6,%eax
End of assembler dump.
(gdb) info all-registers
rax0x2c80   11392
rbx0xed56bb2dcb3c7736   -1344681633365854410
rcx0xdeadbeef00224c50   -2401053092609897392
rdx0xdeadbeef0020   -2401053092612145120
rsi0x75b6ba21077c48ad   8482171599221180589
rdi0x21f86d66c8ca00ce   2447826685698638030
rbp0x8bda43d3fd1a7e06   0x8bda43d3fd1a7e06
rsp0x2192b8 0x2192b8
r8 0xdeadbeef00219af3   -2401053092609942797
r9 0xdeadbeef0010   -2401053092612145136
r100x1  1
r110x10 16
r120xb64a9c9e5d318408   -5311260606547786744
r130xdf9a54b303f1d3a3   -2334460328996121693
r140x4a75479abd64e097   5365273261009854615
r150x249214109d5d1c88   2635190793557318792
rip0x46ae00 0x46ae00 put_hevc_qpel_pixels_4_8+32



With bbb_1080p_c.ts i get:

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 1044.0x6ac]
0x00a91391 in ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop ()
(gdb) disass $pc-32,$pc+32
Dump of assembler code from 0xa91371 to 0xa913b1:
   0x00a91371 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+31:  
pop%rax
   0x00a91372 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+32:  
adc%ah,0x41(%rsi)
   0x00a91375 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+35:  
paddsw 0x10(%rcx),%mm3
   0x00a91379 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+39:  
paddsw %xmm0,%xmm3
   0x00a9137d ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+43:  
psraw  $0x7,%xmm3
   0x00a91382 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+48:  
packuswb %xmm3,%xmm3
   0x00a91386 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+52:  
movq   %xmm3,0x8(%rcx)
   0x00a9138b ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+57:  
movdqa 0x20(%r8),%xmm3
= 0x00a91391 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+63:  
paddsw 0x20(%r9),%xmm3
   0x00a91397 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+69:  
paddsw %xmm0,%xmm3
   0x00a9139b ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+73:  
psraw  $0x7,%xmm3
   0x00a913a0 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+78:  
packuswb %xmm3,%xmm3
   0x00a913a4 ff_hevc_put_unweighted_pred_avg_64_8_sse2.loop+82:  
movq   %xmm3,0x10(%rcx)
   0x00a913a9

[libav-devel] [PATCH] rtmpproto: free hmac context properly

2015-07-29 Thread James Almer

Signed-off-by: James Almer jamr...@gmail.com
---
 libavformat/rtmpproto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 1db7495..ec4b0e7 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -971,7 +971,7 @@ int ff_rtmp_calc_digest(const uint8_t *src, int len, int 
gap,
 }
 av_hmac_final(hmac, dst, 32);
 
-av_free(hmac);
+av_hmac_free(hmac);
 
 return 0;
 }
-- 
2.5.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

1 2 3 4 >

1 - 100 of 362 matches

Mail list logo