4-5 times faster, 5% overall speedup on bourne.rvmb
---
libavcodec/arm/rv40dsp_init_neon.c | 175 ++++++++++++++++++++++++++++++++++++
libavcodec/arm/rv40dsp_neon.S | 175 ++++++++++++++++++++++++++++++++++++
2 files changed, 350 insertions(+), 0 deletions(-)
diff --git a/libavcodec/arm/rv40dsp_init_neon.c
b/libavcodec/arm/rv40dsp_init_neon.c
index b976e7b..3efb4c4 100644
--- a/libavcodec/arm/rv40dsp_init_neon.c
+++ b/libavcodec/arm/rv40dsp_init_neon.c
@@ -24,6 +24,133 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/rv34dsp.h"
+void ff_put_rv40_qpel8_h_lowpass_s5_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int w, const int
C1, const int C2);
+void ff_avg_rv40_qpel8_h_lowpass_s5_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int w, const int
C1, const int C2);
+void ff_put_rv40_qpel8_h_lowpass_s6_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int w, const int
C1, const int C2);
+void ff_avg_rv40_qpel8_h_lowpass_s6_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int w, const int
C1, const int C2);
+
+void ff_put_rv40_qpel8_v_lowpass_s5_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int C1, const int
C2);
+void ff_avg_rv40_qpel8_v_lowpass_s5_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int C1, const int
C2);
+void ff_put_rv40_qpel8_v_lowpass_s6_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int C1, const int
C2);
+void ff_avg_rv40_qpel8_v_lowpass_s6_neon(uint8_t *dst, uint8_t *src, int
dstStride,
+ int srcStride, const int C1, const int
C2);
+
+#define RV40_LOWPASS16(OPNAME, SHIFT)
\
+static void ff_ ## OPNAME ## rv40_qpel16_v_lowpass ## SHIFT(uint8_t *dst,
uint8_t *src,\
+ int dstStride, int
srcStride, \
+ const int C1, const int
C2){ \
+ ff_ ## OPNAME ## rv40_qpel8_v_lowpass ## SHIFT(dst , src , dstStride,
srcStride, C1, C2);\
+ ff_ ## OPNAME ## rv40_qpel8_v_lowpass ## SHIFT(dst+8, src+8, dstStride,
srcStride, C1, C2); \
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ ff_ ## OPNAME ## rv40_qpel8_v_lowpass ## SHIFT(dst , src , dstStride,
srcStride, C1, C2);\
+ ff_ ## OPNAME ## rv40_qpel8_v_lowpass ## SHIFT(dst+8, src+8, dstStride,
srcStride, C1, C2);\
+}\
+\
+static void ff_ ## OPNAME ## rv40_qpel16_h_lowpass ## SHIFT(uint8_t *dst,
uint8_t *src, int dstStride, int srcStride,\
+ const int h, const int C1, const
int C2){\
+ ff_ ## OPNAME ## rv40_qpel8_h_lowpass ## SHIFT(dst , src , dstStride,
srcStride, 8, C1, C2);\
+ ff_ ## OPNAME ## rv40_qpel8_h_lowpass ## SHIFT(dst+8, src+8, dstStride,
srcStride, 8, C1, C2);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ ff_ ## OPNAME ## rv40_qpel8_h_lowpass ## SHIFT(dst , src , dstStride,
srcStride, h-8, C1, C2); \
+ ff_ ## OPNAME ## rv40_qpel8_h_lowpass ## SHIFT(dst+8, src+8, dstStride,
srcStride, h-8, C1, C2); \
+}\
+\
+
+RV40_LOWPASS16(put_, _s5_neon)
+RV40_LOWPASS16(avg_, _s5_neon)
+RV40_LOWPASS16(put_, _s6_neon)
+RV40_LOWPASS16(avg_, _s6_neon)
+
+
+#define RV40_MC(OPNAME, SIZE) \
+static void OPNAME ## rv40_qpel ## SIZE ## _mc10_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _h_lowpass_s6_neon(dst, src, stride,
stride, SIZE, 52, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc30_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _h_lowpass_s6_neon(dst, src, stride,
stride, SIZE, 20, 52); \
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc01_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, src, stride,
stride, 52, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc11_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s6_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 52, 20); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, full_mid,
stride, SIZE, 52, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc21_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s5_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 20, 20); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, full_mid,
stride, SIZE, 52, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc31_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s6_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 20, 52); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, full_mid,
stride, SIZE, 52, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc12_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s6_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 52, 20); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s5_neon(dst, full_mid,
stride, SIZE, 20, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc22_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s5_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 20, 20); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s5_neon(dst, full_mid,
stride, SIZE, 20, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc32_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s6_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 20, 52); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s5_neon(dst, full_mid,
stride, SIZE, 20, 20);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc03_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, src, stride,
stride, 20, 52);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc13_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s6_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 52, 20); \
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, full_mid,
stride, SIZE, 20, 52);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc23_neon(uint8_t *dst, uint8_t
*src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid = full + SIZE*2;\
+ ff_put_rv40_qpel ## SIZE ## _h_lowpass_s5_neon(full, src - 2*stride, SIZE,
stride, SIZE+5, 20, 20);\
+ ff_ ## OPNAME ## rv40_qpel ## SIZE ## _v_lowpass_s6_neon(dst, full_mid,
stride, SIZE, 20, 52);\
+}\
+\
+
+RV40_MC(put_, 8)
+RV40_MC(put_, 16)
+RV40_MC(avg_, 8)
+RV40_MC(avg_, 16)
+
+
void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
@@ -38,6 +165,54 @@ void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{
+ c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_neon;
+ c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_neon;
+ c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_neon;
+ c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_neon;
+ c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_neon;
+ c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_neon;
+ c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_neon;
+ c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_neon;
+ c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_neon;
+ c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_neon;
+ c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_neon;
+ c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_neon;
+ c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_neon;
+ c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_neon;
+ c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_neon;
+ c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_neon;
+ c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_neon;
+ c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_neon;
+ c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_neon;
+ c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_neon;
+ c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_neon;
+ c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_neon;
+ c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_neon;
+ c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_neon;
+ c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_neon;
+ c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_neon;
+ c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_neon;
+ c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_neon;
+ c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_neon;
+ c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_neon;
+ c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_neon;
+ c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_neon;
+ c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_neon;
+ c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_neon;
+ c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_neon;
+ c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_neon;
+ c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_neon;
+ c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_neon;
+ c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_neon;
+ c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_neon;
+ c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_neon;
+ c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_neon;
+ c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_neon;
+ c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_neon;
+ c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_neon;
+ c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_neon;
+ c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_neon;
+ c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_neon;
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index 181c491..deeb7b8 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -20,6 +20,181 @@
*/
#include "asm.S"
+#include "neon_transpose.S"
+
+
+.macro qpel_lowpass r0, r1, rc, shift
+ @ src[-2]
+ vext.8 d24, \r0, \r1, #5 @ src[ 3]
+ vext.8 d25, \r0, \r1, #1 @ src[-1]
+ vext.8 d26, \r0, \r1, #4 @ src[ 2]
+ vext.8 d27, \r0, \r1, #2 @ src[ 0]
+ vext.8 d28, \r0, \r1, #3 @ src[ 1]
+ vaddl.u8 q8, \r0, d24
+ vaddl.u8 q9, d25, d26
+ vmls.s16 q8, q9, \rc[0]
+ vmovl.u8 q10, d27
+ vmovl.u8 q11, d28
+ vmla.s16 q8, q10, \rc[1]
+ vmla.s16 q8, q11, \rc[2]
+ vqrshrun.s16 \r0, q8, #\shift
+.endm
+
+.macro qpel_lowpass_x2 r0, r1, r2, r3, rc, shift
+ vext.8 d24, \r0, \r1, #5 @ src[ 3]
+ vext.8 d25, \r0, \r1, #1 @ src[-1]
+ vext.8 d26, \r0, \r1, #4 @ src[ 2]
+ vext.8 d29, \r0, \r1, #2 @ src[ 0]
+ vext.8 d28, \r0, \r1, #3 @ src[ 1]
+ vaddl.u8 q8, \r0, d24
+ vaddl.u8 q9, d25, d26
+ vmls.s16 q8, q9, \rc[0]
+ vext.8 \r0, \r2, \r3, #5 @ src[ 3]
+ vext.8 \r1, \r2, \r3, #1 @ src[-1]
+ vext.8 d1, \r2, \r3, #4 @ src[ 2]
+ vaddl.u8 q12, \r2, \r0
+ vaddl.u8 q13, \r1, d1
+ vmovl.u8 q10, d29
+ vmovl.u8 q11, d28
+ vmls.s16 q12, q13, \rc[0]
+ vmla.s16 q8, q10, \rc[1]
+ vext.8 d26, \r2, \r3, #2 @ src[ 0]
+ vext.8 d27, \r2, \r3, #3 @ src[ 1]
+ vmovl.u8 q14, d26
+ vmovl.u8 q10, d27
+ vmla.s16 q8, q11, \rc[2]
+ vmla.s16 q12, q14, \rc[1]
+ vmla.s16 q12, q10, \rc[2]
+ vqrshrun.s16 \r0, q8, #\shift
+ vqrshrun.s16 \r2, q12, #\shift
+.endm
+
+/* void avg|put_rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int
dstStride, int srcStride,
+ const int h, const int C1, const int C2,
const int SHIFT)*/
+.macro rv40_qpel8_h_lowpass type, shift
+function ff_\type\()_rv40_qpel8_h_lowpass_s\shift\()_neon, export=1
+ push {r4-r6, lr}
+ ldrd r4, [sp, #16] @ r4 = h, r5 = C1
+ ldr r6, [sp, #24] @ r6 = C2
+.ifc \type,avg
+ mov lr, r0
+ pld [r0]
+.endif
+ pld [r1]
+ vmov.s16 d0, #5
+ vmov.s16 d0[1], r5
+ vmov.s16 d0[2], r6
+ sub r1, r1, #2 @ load 2 previous pixels
+ lsrs r4, r4, #1 @ divide by 2 for
unrolling once
+ bcs 2f
+1:
+ vld1.64 {q2}, [r1], r3 @ load 16 pixels
+ vld1.64 {q3}, [r1], r3 @ load 16 pixels
+ qpel_lowpass_x2 d4, d5, d6, d7, d0, \shift
+.ifc \type,avg
+ vld1.64 {d3}, [lr], r2
+ vld1.64 {d8}, [lr], r2
+ vrhadd.u8 d4, d4, d3
+ vrhadd.u8 d6, d6, d8
+.endif
+ vst1.64 {d4}, [r0], r2
+ vst1.64 {d6}, [r0], r2
+ subs r4, r4, #1
+ bne 1b
+ pop {r4-r6, pc}
+2:
+ vld1.64 {q2}, [r1], r3 @ load 16 pixels
+ qpel_lowpass d4, d5, d0, \shift
+.ifc \type,avg
+ vld1.64 {d3}, [lr], r2
+ vrhadd.u8 d4, d4, d3
+.endif
+ vst1.64 {d4}, [r0], r2
+ b 1b
+endfunc
+.endm
+
+
+/* void avg|put_rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int
dstStride, int srcStride,
+ const int C1, const int C2)*/
+.macro rv40_qpel8_v_lowpass type, shift
+function ff_\type\()_rv40_qpel8_v_lowpass_s\shift\()_neon, export=1
+ push {r4-r5, lr}
+ vpush {q4-q7}
+ ldrd r4, [sp, #76] @ r4 = C1, r5 = C2
+.ifc \type,avg
+ mov lr, r0
+ pld [r0]
+.endif
+ pld [r1]
+ vmov.s16 d0, #5
+ vmov.s16 d0[1], r4
+ vmov.s16 d0[2], r5
+ sub r1, r1, r3, lsl #1 @ load 2 previous pixel
rows
+1:
+ vld1.64 {d2}, [r1], r3 @ load 8 pixels
+ vld1.64 {d3}, [r1], r3 @ load 8 pixels
+ vld1.64 {d4}, [r1], r3 @ load 8 pixels
+ vld1.64 {d5}, [r1], r3 @ load 8 pixels
+ vld1.64 {d6}, [r1], r3 @ load 8 pixels
+ vld1.64 {d7}, [r1], r3 @ load 8 pixels
+ vld1.64 {d8}, [r1], r3 @ load 8 pixels
+ vld1.64 {d9}, [r1], r3 @ load 8 pixels
+ vld1.64 {d10}, [r1], r3 @ load 8 pixels
+ vld1.64 {d11}, [r1], r3 @ load 8 pixels
+ vld1.64 {d12}, [r1], r3 @ load 8 pixels
+ vld1.64 {d13}, [r1], r3 @ load 8 pixels
+ vld1.64 {d14}, [r1], r3 @ load 8 pixels
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+ transpose_8x8 d10, d11, d12, d13, d14, d15, d30, d31
+ qpel_lowpass_x2 d2, d10, d3, d11, d0, \shift
+ qpel_lowpass_x2 d4, d12, d5, d13, d0, \shift
+ qpel_lowpass_x2 d6, d14, d7, d15, d0, \shift
+ qpel_lowpass_x2 d8, d30, d9, d31, d0, \shift
+ transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9
+.ifc \type,avg
+ vld1.64 d12, [lr], r2
+ vld1.64 d13, [lr], r2
+ vld1.64 d14, [lr], r2
+ vld1.64 d15, [lr], r2
+ vld1.64 d16, [lr], r2
+ vld1.64 d17, [lr], r2
+ vld1.64 d18, [lr], r2
+ vld1.64 d19, [lr], r2
+ vrhadd.u8 d2, d2, d12
+ vrhadd.u8 d3, d3, d13
+ vrhadd.u8 d4, d4, d14
+ vrhadd.u8 d5, d5, d15
+ vrhadd.u8 d6, d6, d16
+ vrhadd.u8 d7, d7, d17
+ vrhadd.u8 d8, d8, d18
+ vrhadd.u8 d9, d9, d19
+.endif
+ vst1.64 d2, [r0], r2
+ vst1.64 d3, [r0], r2
+ vst1.64 d4, [r0], r2
+ vst1.64 d5, [r0], r2
+ vst1.64 d6, [r0], r2
+ vst1.64 d7, [r0], r2
+ vst1.64 d8, [r0], r2
+ vst1.64 d9, [r0], r2
+ vpop {q4-q7}
+ pop {r4-r5, pc}
+endfunc
+.endm
+ .text
+ .align
+
+ rv40_qpel8_h_lowpass avg 5
+ rv40_qpel8_h_lowpass put 5
+ rv40_qpel8_h_lowpass avg 6
+ rv40_qpel8_h_lowpass put 6
+
+ rv40_qpel8_v_lowpass avg 5
+ rv40_qpel8_v_lowpass put 5
+ rv40_qpel8_v_lowpass avg 6
+ rv40_qpel8_v_lowpass put 6
+
const rv40bias
.short 0,16,32,16,32,28,32,28,0,32,16,32,32,28,32,28
--
1.7.7
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel