[FFmpeg-cvslog] fate: disable fate-svq3-2
ffmpeg | branch: master | James Almer| Mon Oct 2 21:40:59 2017 -0300| [aa4fe27657462742943dfbd185a18c223ae4dca3] | committer: James Almer fate: disable fate-svq3-2 The first frame changes depending on --enable-memory-poisoning being used to configure ffmpeg or not, even if requesting bitexact decoding. Disable the test until this is fixed. Signed-off-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa4fe27657462742943dfbd185a18c223ae4dca3 --- tests/fate/qt.mak | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak index 2a7fc2e0fa..c054129f08 100644 --- a/tests/fate/qt.mak +++ b/tests/fate/qt.mak @@ -52,7 +52,8 @@ fate-svq1-headerswap: CMD = framecrc -i $(TARGET_SAMPLES)/svq1/ct_ending_cut.mov FATE_SVQ3 += fate-svq3-1 fate-svq3-1: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an -FATE_SVQ3 += fate-svq3-2 +#FATE_SVQ3 += fate-svq3-2 +#FIXME: first frame changes depending on --enable-memory-poisoning being used to configure or not fate-svq3-2: CMD = framecrc -flags +bitexact -ignore_editlist 1 -i $(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an FATE_SVQ3 += fate-svq3-watermark ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/encode: free non-referenced packets' side data in the old encode API functions
ffmpeg | branch: master | James Almer| Mon Oct 2 18:58:39 2017 -0300| [712ee85816ef854761f30ea57ea628997bd62e60] | committer: James Almer avcodec/encode: free non-referenced packets' side data in the old encode API functions Fixes memleaks introduced by a22c6a4796ca1f2cbee6784262515da876fbec22. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=712ee85816ef854761f30ea57ea628997bd62e60 --- libavcodec/encode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavcodec/encode.c b/libavcodec/encode.c index dd50486bcf..c152228c92 100644 --- a/libavcodec/encode.c +++ b/libavcodec/encode.c @@ -227,6 +227,7 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx, ret = av_packet_ref(, avpkt); if (ret < 0) return ret; +av_packet_unref(avpkt); *avpkt = tmp; } } @@ -325,6 +326,7 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx, ret = av_packet_ref(, avpkt); if (ret < 0) return ret; +av_packet_unref(avpkt); *avpkt = tmp; } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit '8e4d4efc67e154fdffd65964a7cfeef740320827'
ffmpeg | branch: master | James Almer| Mon Oct 2 18:23:48 2017 -0300| [b591329c3afe445c45eaecadd5fe3b80a837ee2f] | committer: James Almer Merge commit '8e4d4efc67e154fdffd65964a7cfeef740320827' * commit '8e4d4efc67e154fdffd65964a7cfeef740320827': fate: Add another SVQ3 test to increase coverage Also included a fix from da8093f712d625db7ce4a2526fb52994e01921ec. The demuxer option "-ignore_editlist 1 " is temporarily added to the test as well, to workaround a regression in the edit list mov parsing code. Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b591329c3afe445c45eaecadd5fe3b80a837ee2f --- tests/fate/qt.mak | 12 ++-- tests/ref/fate/{svq3 => svq3-1} | 0 tests/ref/fate/svq3-2 | 24 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak index 335ec44dc2..2a7fc2e0fa 100644 --- a/tests/fate/qt.mak +++ b/tests/fate/qt.mak @@ -49,10 +49,18 @@ fate-svq1: CMD = framecrc -i $(TARGET_SAMPLES)/svq1/marymary-shackles.mov -an -t FATE_QT-$(call DEMDEC, MOV, SVQ1) += fate-svq1-headerswap fate-svq1-headerswap: CMD = framecrc -i $(TARGET_SAMPLES)/svq1/ct_ending_cut.mov -frames 4 -FATE_QT-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += fate-svq3 fate-svq3-watermark -fate-svq3: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an +FATE_SVQ3 += fate-svq3-1 +fate-svq3-1: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an + +FATE_SVQ3 += fate-svq3-2 +fate-svq3-2: CMD = framecrc -flags +bitexact -ignore_editlist 1 -i $(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an + +FATE_SVQ3 += fate-svq3-watermark fate-svq3-watermark: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/svq3/svq3_watermark.mov +FATE_QT-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += $(FATE_SVQ3) +fate-svq3: $(FATE_SVQ3) + FATE_QT += $(FATE_QT-yes) FATE_SAMPLES_FFMPEG += $(FATE_QT) diff --git a/tests/ref/fate/svq3 b/tests/ref/fate/svq3-1 similarity index 100% rename from tests/ref/fate/svq3 rename to tests/ref/fate/svq3-1 diff --git a/tests/ref/fate/svq3-2 b/tests/ref/fate/svq3-2 new file mode 100644 index 00..1d89c05971 --- /dev/null +++ b/tests/ref/fate/svq3-2 @@ -0,0 +1,24 @@ +#tb 0: 1/24 +#media_type 0: video +#codec_id 0: rawvideo +#dimensions 0: 480x257 +#sar 0: 0/1 +0, 1, 1,1, 185280, 0x044209c4 +0, 2, 2,1, 185280, 0x427ef9a7 +0, 3, 3,1, 185280, 0x8f771cc8 +0, 4, 4,1, 185280, 0xb40d0e52 +0, 5, 5,1, 185280, 0x2e6ee461 +0, 6, 6,1, 185280, 0x681ba513 +0, 7, 7,1, 185280, 0x998c5676 +0, 8, 8,1, 185280, 0xf91003ec +0, 9, 9,1, 185280, 0x322ed3de +0, 10, 10,1, 185280, 0xb1c9370a +0, 11, 11,1, 185280, 0x41423b36 +0, 12, 12,1, 185280, 0x0b9284e4 +0, 13, 13,1, 185280, 0x185789b2 +0, 14, 14,1, 185280, 0x8f0bece0 +0, 15, 15,1, 185280, 0xfcd9450e +0, 16, 16,1, 185280, 0x509d868b +0, 17, 17,1, 185280, 0x71fd9ae3 +0, 18, 18,1, 185280, 0x3dad1b3c +0, 19, 19,1, 185280, 0x69ba37dd == diff --cc tests/fate/qt.mak index 335ec44dc2,761db8d234..2a7fc2e0fa --- a/tests/fate/qt.mak +++ b/tests/fate/qt.mak @@@ -7,53 -7,53 +7,61 @@@ fate-qdm2: CMP = oneof fate-qdm2: REF = $(SAMPLES)/qt-surge-suite/surge-2-16-B-QDM2.pcm fate-qdm2: FUZZ = 2 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-mono +FATE_QT-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-mono fate-qt-alaw-mono: CMD = md5 -i $(TARGET_SAMPLES)/qt-surge-suite/surge-1-16-B-alaw.mov -f s16le -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-stereo +FATE_QT-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-stereo fate-qt-alaw-stereo: CMD = md5 -i $(TARGET_SAMPLES)/qt-surge-suite/surge-2-16-B-alaw.mov -f s16le -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-mono +FATE_QT-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-mono fate-qt-ima4-mono: CMD = md5 -i $(TARGET_SAMPLES)/qt-surge-suite/surge-1-16-B-ima4.mov -f s16le -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-stereo +FATE_QT-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-stereo fate-qt-ima4-stereo: CMD = md5 -i $(TARGET_SAMPLES)/qt-surge-suite/surge-2-16-B-ima4.mov -f s16le -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, MACE3) += fate-qt-mac3-mono +FATE_QT-$(call DEMDEC, MOV, MACE3) += fate-qt-mac3-mono
[FFmpeg-cvslog] fate: Add another SVQ3 test to increase coverage
ffmpeg | branch: master | Diego Biurrun| Sat Apr 6 12:48:32 2013 +0200| [8e4d4efc67e154fdffd65964a7cfeef740320827] | committer: Diego Biurrun fate: Add another SVQ3 test to increase coverage > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8e4d4efc67e154fdffd65964a7cfeef740320827 --- tests/fate/qt.mak | 10 -- tests/ref/fate/{svq3 => svq3-1} | 0 tests/ref/fate/svq3-2 | 20 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak index 97537f93ef..761db8d234 100644 --- a/tests/fate/qt.mak +++ b/tests/fate/qt.mak @@ -49,5 +49,11 @@ fate-svq1: CMD = framecrc -i $(TARGET_SAMPLES)/svq1/marymary-shackles.mov -an -t FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, SVQ1) += fate-svq1-headerswap fate-svq1-headerswap: CMD = framecrc -i $(TARGET_SAMPLES)/svq1/ct_ending_cut.mov -frames 4 -FATE_SAMPLES_AVCONV-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += fate-svq3 -fate-svq3: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an +FATE_SVQ3 += fate-svq3-1 +fate-svq3-1: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an + +FATE_SVQ3 += fate-svq3-2 +fate-svq3-2: CMD = framecrc -i $(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an + +FATE_SAMPLES_AVCONV-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += $(FATE_SVQ3) +fate-svq3: $(FATE_SVQ3) diff --git a/tests/ref/fate/svq3 b/tests/ref/fate/svq3-1 similarity index 100% rename from tests/ref/fate/svq3 rename to tests/ref/fate/svq3-1 diff --git a/tests/ref/fate/svq3-2 b/tests/ref/fate/svq3-2 new file mode 100644 index 00..7e69b31b01 --- /dev/null +++ b/tests/ref/fate/svq3-2 @@ -0,0 +1,20 @@ +#tb 0: 1/19200 +0, -2, -2,0, 185280, 0x061c0d85 +0, -19200, -19200,0, 185280, 0x427ef9a7 +0, -18400, -18400,0, 185280, 0x8f495d37 +0, -17600, -17600,0, 185280, 0x5bfd0e5b +0, -16800, -16800,0, 185280, 0x60d12d25 +0, -16000, -16000,0, 185280, 0x25aaa51b +0, -15200, -15200,0, 185280, 0x9cf58bf0 +0, -14400, -14400,0, 185280, 0xd9bd03ea +0, -13600, -13600,0, 185280, 0xd18be732 +0, -12800, -12800,0, 185280, 0x92763708 +0, -12000, -12000,0, 185280, 0x94b5784a +0, -11200, -11200,0, 185280, 0x32b184c9 +0, -10400, -10400,0, 185280, 0xe316fec3 +0, -9600, -9600,0, 185280, 0x6344ec88 +0, -8800, -8800,0, 185280, 0xe0aa6de4 +0, -8000, -8000,0, 185280, 0x6cfc8687 +0, -7200, -7200,0, 185280, 0x26ddc189 +0, -6400, -6400,0, 185280, 0x5a0c1b38 +0, -5600, -5600,0, 185280, 0x79a88cb9 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: vp9itxfm: Reorder iadst16 coeffs
ffmpeg | branch: master | Martin Storsjö| Sat Dec 31 22:27:13 2016 +0200| [08074c092d8c97d71c5986e5325e97ffc956119d] | committer: Martin Storsjö arm: vp9itxfm: Reorder iadst16 coeffs This matches the order they are in the 16 bpp version. There they are in this order, to make sure we access them in the same order they are declared, easing loading only half of the coefficients at a time. This makes the 8 bpp version match the 16 bpp version better. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=08074c092d8c97d71c5986e5325e97ffc956119d --- libavcodec/arm/vp9itxfm_neon.S | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index 1d4d6a7910..a612b25f4f 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -37,8 +37,8 @@ idct_coeffs: endconst const iadst16_coeffs, align=4 -.short 16364, 804, 15893, 3981, 14811, 7005, 13160, 9760 -.short 11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207 +.short 16364, 804, 15893, 3981, 11003, 12140, 8423, 14053 +.short 14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207 endconst @ Do four 4x4 transposes, using q registers for the subtransposes that don't @@ -678,19 +678,19 @@ function iadst16 vld1.16 {q0-q1}, [r12,:128] mbutterfly_lq3, q2, d31, d16, d0[1], d0[0] @ q3 = t1, q2 = t0 -mbutterfly_lq5, q4, d23, d24, d2[1], d2[0] @ q5 = t9, q4 = t8 +mbutterfly_lq5, q4, d23, d24, d1[1], d1[0] @ q5 = t9, q4 = t8 butterfly_n d31, d24, q3, q5, q6, q5 @ d31 = t1a, d24 = t9a mbutterfly_lq7, q6, d29, d18, d0[3], d0[2] @ q7 = t3, q6 = t2 butterfly_n d16, d23, q2, q4, q3, q4 @ d16 = t0a, d23 = t8a -mbutterfly_lq3, q2, d21, d26, d2[3], d2[2] @ q3 = t11, q2 = t10 +mbutterfly_lq3, q2, d21, d26, d1[3], d1[2] @ q3 = t11, q2 = t10 butterfly_n d29, d26, q7, q3, q4, q3 @ d29 = t3a, d26 = t11a -mbutterfly_lq5, q4, d27, d20, d1[1], d1[0] @ q5 = t5, q4 = t4 +mbutterfly_lq5, q4, d27, d20, d2[1], d2[0] @ q5 = t5, q4 = t4 butterfly_n d18, d21, q6, q2, q3, q2 @ d18 = t2a, d21 = t10a mbutterfly_lq7, q6, d19, d28, d3[1], d3[0] @ q7 = t13, q6 = t12 butterfly_n d20, d28, q5, q7, q2, q7 @ d20 = t5a, d28 = t13a -mbutterfly_lq3, q2, d25, d22, d1[3], d1[2] @ q3 = t7, q2 = t6 +mbutterfly_lq3, q2, d25, d22, d2[3], d2[2] @ q3 = t7, q2 = t6 butterfly_n d27, d19, q4, q6, q5, q6 @ d27 = t4a, d19 = t12a mbutterfly_lq5, q4, d17, d30, d3[3], d3[2] @ q5 = t15, q4 = t14 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a'
ffmpeg | branch: master | James Almer| Mon Oct 2 16:13:01 2017 -0300| [9f5d238a66eab835a3137fbf014b7d85f3172cd7] | committer: James Almer Merge commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a' * commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a': aarch64: vp9itxfm: Reorder iadst16 coeffs arm: vp9itxfm: Reorder iadst16 coeffs aarch64: vp9itxfm: Reorder the idct coefficients for better pairing arm: vp9itxfm: Reorder the idct coefficients for better pairing aarch64: vp9itxfm: Avoid reloading the idct32 coefficients arm: vp9itxfm: Avoid reloading the idct32 coefficients arm: vp9lpf: Implement the mix2_44 function with one single filter pass aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1 arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit This commit is a noop, see 3fbbad29847c79f422128ad88f174c53a5f6c449 f32690a298badbf2df66319e9b38236ad3d3e321 a88db8b9a016fe47997029e3653cdac4777994b4 600f4c9b03b8d39b986a00dd9dafa61be7d86a72 2905657b902fea8718434f0d29056cf4e7434307 4f693b56bdcfda37b4f2c48b39dcf12439c149c8 f952273019984da5e7bfa1298e1cdb0683049296 b2e20d89844b51c3d9565b293606d1433bd67f25 26ee83acc4ebd765529b666c7f050243b7677d76 Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9f5d238a66eab835a3137fbf014b7d85f3172cd7 --- ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] aarch64: vp9itxfm: Reorder the idct coefficients for better pairing
ffmpeg | branch: master | Martin Storsjö| Sat Dec 31 14:18:31 2016 +0200| [09eb88a12e008d10a3f7a6be75d18ad98b368e68] | committer: Martin Storsjö aarch64: vp9itxfm: Reorder the idct coefficients for better pairing All elements are used pairwise, except for the first one. Previously, the 16th element was unused. Move the unused element to the second slot, to make the later element pairs not split across registers. This simplifies loading only parts of the coefficients, reducing the difference to the 16 bpp version. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=09eb88a12e008d10a3f7a6be75d18ad98b368e68 --- libavcodec/aarch64/vp9itxfm_neon.S | 124 ++--- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index b6c2575236..d4fc2163aa 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -22,7 +22,7 @@ #include "neon.S" const itxfm4_coeffs, align=4 -.short 11585, 6270, 15137, 0 +.short 11585, 0, 6270, 15137 iadst4_coeffs: .short 5283, 15212, 9929, 13377 endconst @@ -30,8 +30,8 @@ endconst const iadst8_coeffs, align=4 .short 16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679 idct_coeffs: -.short 11585, 6270, 15137, 3196, 16069, 13623, 9102, 1606 -.short 16305, 12665, 10394, 7723, 14449, 15679, 4756, 0 +.short 11585, 0, 6270, 15137, 3196, 16069, 13623, 9102 +.short 1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756 .short 804, 16364, 12140, 11003, 7005, 14811, 15426, 5520 .short 3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404 endconst @@ -192,14 +192,14 @@ endconst .endm .macro idct4 c0, c1, c2, c3 -smull v22.4s,\c1\().4h, v0.h[2] -smull v20.4s,\c1\().4h, v0.h[1] +smull v22.4s,\c1\().4h, v0.h[3] +smull v20.4s,\c1\().4h, v0.h[2] add v16.4h,\c0\().4h, \c2\().4h sub v17.4h,\c0\().4h, \c2\().4h -smlal v22.4s,\c3\().4h, v0.h[1] +smlal v22.4s,\c3\().4h, v0.h[2] smull v18.4s,v16.4h,v0.h[0] smull v19.4s,v17.4h,v0.h[0] -smlsl v20.4s,\c3\().4h, v0.h[2] +smlsl v20.4s,\c3\().4h, v0.h[3] rshrn v22.4h,v22.4s,#14 rshrn v18.4h,v18.4s,#14 rshrn v19.4h,v19.4s,#14 @@ -326,9 +326,9 @@ itxfm_func4x4 iwht, iwht .macro idct8 dmbutterfly0v16, v20, v16, v20, v2, v3, v4, v5, v6, v7 // v16 = t0a, v20 = t1a -dmbutterfly v18, v22, v0.h[1], v0.h[2], v2, v3, v4, v5 // v18 = t2a, v22 = t3a -dmbutterfly v17, v23, v0.h[3], v0.h[4], v2, v3, v4, v5 // v17 = t4a, v23 = t7a -dmbutterfly v21, v19, v0.h[5], v0.h[6], v2, v3, v4, v5 // v21 = t5a, v19 = t6a +dmbutterfly v18, v22, v0.h[2], v0.h[3], v2, v3, v4, v5 // v18 = t2a, v22 = t3a +dmbutterfly v17, v23, v0.h[4], v0.h[5], v2, v3, v4, v5 // v17 = t4a, v23 = t7a +dmbutterfly v21, v19, v0.h[6], v0.h[7], v2, v3, v4, v5 // v21 = t5a, v19 = t6a butterfly_8hv24, v25, v16, v22 // v24 = t0, v25 = t3 butterfly_8hv28, v29, v17, v21 // v28 = t4, v29 = t5a @@ -361,8 +361,8 @@ itxfm_func4x4 iwht, iwht dmbutterfly0v19, v20, v6, v7, v24, v26, v27, v28, v29, v30 // v19 = -out[3], v20 = out[4] neg v19.8h, v19.8h // v19 = out[3] -dmbutterfly_l v26, v27, v28, v29, v5, v3, v0.h[1], v0.h[2] // v26,v27 = t5a, v28,v29 = t4a -dmbutterfly_l v2, v3, v4, v5, v31, v25, v0.h[2], v0.h[1] // v2,v3 = t6a, v4,v5 = t7a +dmbutterfly_l v26, v27, v28, v29, v5, v3, v0.h[2], v0.h[3] // v26,v27 = t5a, v28,v29 = t4a +dmbutterfly_l v2, v3, v4, v5, v31, v25, v0.h[3], v0.h[2] // v2,v3 = t6a, v4,v5 = t7a dbutterfly_nv17, v30, v28, v29, v2, v3, v6, v7, v24, v25 // v17 = -out[1], v30 = t6 dbutterfly_nv22, v31, v26, v27, v4, v5, v6, v7, v24, v25 // v22 = out[6], v31 = t7 @@ -543,13 +543,13 @@ endfunc function idct16 dmbutterfly0v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = t0a, v24 = t1a -dmbutterfly v20, v28, v0.h[1], v0.h[2], v2, v3, v4, v5 // v20 = t2a, v28 = t3a -dmbutterfly v18, v30, v0.h[3], v0.h[4], v2, v3, v4, v5 // v18 = t4a, v30 = t7a -dmbutterfly v26, v22, v0.h[5], v0.h[6], v2, v3, v4, v5 // v26 = t5a, v22 = t6a -dmbutterfly v17, v31, v0.h[7], v1.h[0], v2, v3, v4, v5 // v17 = t8a, v31 = t15a -dmbutterfly v25, v23, v1.h[1], v1.h[2], v2, v3, v4, v5 // v25 = t9a, v23 =
[FFmpeg-cvslog] aarch64: vp9itxfm: Reorder iadst16 coeffs
ffmpeg | branch: master | Martin Storsjö| Sat Dec 31 22:27:13 2016 +0200| [b8f66c0838b4c645227f23a35b4d54373da4c60a] | committer: Martin Storsjö aarch64: vp9itxfm: Reorder iadst16 coeffs This matches the order they are in the 16 bpp version. There they are in this order, to make sure we access them in the same order they are declared, easing loading only half of the coefficients at a time. This makes the 8 bpp version match the 16 bpp version better. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b8f66c0838b4c645227f23a35b4d54373da4c60a --- libavcodec/aarch64/vp9itxfm_neon.S | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index d4fc2163aa..93dc736f01 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -37,8 +37,8 @@ idct_coeffs: endconst const iadst16_coeffs, align=4 -.short 16364, 804, 15893, 3981, 14811, 7005, 13160, 9760 -.short 11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207 +.short 16364, 804, 15893, 3981, 11003, 12140, 8423, 14053 +.short 14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207 endconst // out1 = ((in1 + in2) * v0[0] + (1 << 13)) >> 14 @@ -628,19 +628,19 @@ function iadst16 ld1 {v0.8h,v1.8h}, [x11] dmbutterfly_l v6, v7, v4, v5, v31, v16, v0.h[1], v0.h[0] // v6,v7 = t1, v4,v5 = t0 -dmbutterfly_l v10, v11, v8, v9, v23, v24, v1.h[1], v1.h[0] // v10,v11 = t9, v8,v9 = t8 +dmbutterfly_l v10, v11, v8, v9, v23, v24, v0.h[5], v0.h[4] // v10,v11 = t9, v8,v9 = t8 dbutterfly_nv31, v24, v6, v7, v10, v11, v12, v13, v10, v11 // v31 = t1a, v24 = t9a dmbutterfly_l v14, v15, v12, v13, v29, v18, v0.h[3], v0.h[2] // v14,v15 = t3, v12,v13 = t2 dbutterfly_nv16, v23, v4, v5, v8, v9, v6, v7, v8, v9 // v16 = t0a, v23 = t8a -dmbutterfly_l v6, v7, v4, v5, v21, v26, v1.h[3], v1.h[2] // v6,v7 = t11, v4,v5 = t10 +dmbutterfly_l v6, v7, v4, v5, v21, v26, v0.h[7], v0.h[6] // v6,v7 = t11, v4,v5 = t10 dbutterfly_nv29, v26, v14, v15, v6, v7, v8, v9, v6, v7 // v29 = t3a, v26 = t11a -dmbutterfly_l v10, v11, v8, v9, v27, v20, v0.h[5], v0.h[4] // v10,v11 = t5, v8,v9 = t4 +dmbutterfly_l v10, v11, v8, v9, v27, v20, v1.h[1], v1.h[0] // v10,v11 = t5, v8,v9 = t4 dbutterfly_nv18, v21, v12, v13, v4, v5, v6, v7, v4, v5 // v18 = t2a, v21 = t10a dmbutterfly_l v14, v15, v12, v13, v19, v28, v1.h[5], v1.h[4] // v14,v15 = t13, v12,v13 = t12 dbutterfly_nv20, v28, v10, v11, v14, v15, v4, v5, v14, v15 // v20 = t5a, v28 = t13a -dmbutterfly_l v6, v7, v4, v5, v25, v22, v0.h[7], v0.h[6] // v6,v7 = t7, v4,v5 = t6 +dmbutterfly_l v6, v7, v4, v5, v25, v22, v1.h[3], v1.h[2] // v6,v7 = t7, v4,v5 = t6 dbutterfly_nv27, v19, v8, v9, v12, v13, v10, v11, v12, v13 // v27 = t4a, v19 = t12a dmbutterfly_l v10, v11, v8, v9, v17, v30, v1.h[7], v1.h[6] // v10,v11 = t15, v8,v9 = t14 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] arm: vp9itxfm: Reorder the idct coefficients for better pairing
ffmpeg | branch: master | Martin Storsjö| Sat Dec 31 14:05:44 2016 +0200| [de06bdfe6c8abd8266d5c6f5c68e4df0060b61fc] | committer: Martin Storsjö arm: vp9itxfm: Reorder the idct coefficients for better pairing All elements are used pairwise, except for the first one. Previously, the 16th element was unused. Move the unused element to the second slot, to make the later element pairs not split across registers. This simplifies loading only parts of the coefficients, reducing the difference to the 16 bpp version. Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=de06bdfe6c8abd8266d5c6f5c68e4df0060b61fc --- libavcodec/arm/vp9itxfm_neon.S | 124 - 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index bed502eba2..1d4d6a7910 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -22,7 +22,7 @@ #include "neon.S" const itxfm4_coeffs, align=4 -.short 11585, 6270, 15137, 0 +.short 11585, 0, 6270, 15137 iadst4_coeffs: .short 5283, 15212, 9929, 13377 endconst @@ -30,8 +30,8 @@ endconst const iadst8_coeffs, align=4 .short 16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679 idct_coeffs: -.short 11585, 6270, 15137, 3196, 16069, 13623, 9102, 1606 -.short 16305, 12665, 10394, 7723, 14449, 15679, 4756, 0 +.short 11585, 0, 6270, 15137, 3196, 16069, 13623, 9102 +.short 1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756 .short 804, 16364, 12140, 11003, 7005, 14811, 15426, 5520 .short 3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404 endconst @@ -224,14 +224,14 @@ endconst .endm .macro idct4 c0, c1, c2, c3 -vmull.s16 q13, \c1, d0[2] -vmull.s16 q11, \c1, d0[1] +vmull.s16 q13, \c1, d0[3] +vmull.s16 q11, \c1, d0[2] vadd.i16d16, \c0, \c2 vsub.i16d17, \c0, \c2 -vmlal.s16 q13, \c3, d0[1] +vmlal.s16 q13, \c3, d0[2] vmull.s16 q9, d16, d0[0] vmull.s16 q10, d17, d0[0] -vmlsl.s16 q11, \c3, d0[2] +vmlsl.s16 q11, \c3, d0[3] vrshrn.s32 d26, q13, #14 vrshrn.s32 d18, q9, #14 vrshrn.s32 d20, q10, #14 @@ -350,9 +350,9 @@ itxfm_func4x4 iwht, iwht .macro idct8 dmbutterfly0d16, d17, d24, d25, q8, q12, q2, q4, d4, d5, d8, d9, q3, q2, q5, q4 @ q8 = t0a, q12 = t1a -dmbutterfly d20, d21, d28, d29, d0[1], d0[2], q2, q3, q4, q5 @ q10 = t2a, q14 = t3a -dmbutterfly d18, d19, d30, d31, d0[3], d1[0], q2, q3, q4, q5 @ q9 = t4a, q15 = t7a -dmbutterfly d26, d27, d22, d23, d1[1], d1[2], q2, q3, q4, q5 @ q13 = t5a, q11 = t6a +dmbutterfly d20, d21, d28, d29, d0[2], d0[3], q2, q3, q4, q5 @ q10 = t2a, q14 = t3a +dmbutterfly d18, d19, d30, d31, d1[0], d1[1], q2, q3, q4, q5 @ q9 = t4a, q15 = t7a +dmbutterfly d26, d27, d22, d23, d1[2], d1[3], q2, q3, q4, q5 @ q13 = t5a, q11 = t6a butterfly q2, q14, q8, q14 @ q2 = t0, q14 = t3 butterfly q3, q10, q12, q10 @ q3 = t1, q10 = t2 @@ -386,8 +386,8 @@ itxfm_func4x4 iwht, iwht vneg.s16q15, q15 @ q15 = out[7] butterfly q8, q9, q11, q9 @ q8 = out[0], q9 = t2 -dmbutterfly_l q10, q11, q5, q7, d4, d5, d6, d7, d0[1], d0[2] @ q10,q11 = t5a, q5,q7 = t4a -dmbutterfly_l q2, q3, q13, q14, d12, d13, d8, d9, d0[2], d0[1] @ q2,q3 = t6a, q13,q14 = t7a +dmbutterfly_l q10, q11, q5, q7, d4, d5, d6, d7, d0[2], d0[3] @ q10,q11 = t5a, q5,q7 = t4a +dmbutterfly_l q2, q3, q13, q14, d12, d13, d8, d9, d0[3], d0[2] @ q2,q3 = t6a, q13,q14 = t7a dbutterfly_nd28, d29, d8, d9, q10, q11, q13, q14, q4, q6, q10, q11 @ q14 = out[6], q4 = t7 @@ -594,13 +594,13 @@ endfunc function idct16 mbutterfly0 d16, d24, d16, d24, d4, d6, q2, q3 @ d16 = t0a, d24 = t1a -mbutterfly d20, d28, d0[1], d0[2], q2, q3 @ d20 = t2a, d28 = t3a -mbutterfly d18, d30, d0[3], d1[0], q2, q3 @ d18 = t4a, d30 = t7a -mbutterfly d26, d22, d1[1], d1[2], q2, q3 @ d26 = t5a, d22 = t6a -mbutterfly d17, d31, d1[3], d2[0], q2, q3 @ d17 = t8a, d31 = t15a -mbutterfly d25, d23, d2[1], d2[2], q2, q3 @ d25 = t9a, d23 = t14a -mbutterfly d21, d27, d2[3], d3[0], q2, q3 @ d21 = t10a, d27 = t13a -mbutterfly d29, d19, d3[1], d3[2], q2, q3 @ d29 = t11a, d19 = t12a +mbutterfly d20, d28, d0[2], d0[3], q2, q3 @ d20 = t2a, d28 = t3a +mbutterfly d18, d30, d1[0], d1[1], q2, q3 @ d18 = t4a,
[FFmpeg-cvslog] arm: vp9itxfm: Avoid reloading the idct32 coefficients
ffmpeg | branch: master | Martin Storsjö| Mon Jan 2 22:50:38 2017 +0200| [402546a17233a8815307df9e14ff88cd70424537] | committer: Martin Storsjö arm: vp9itxfm: Avoid reloading the idct32 coefficients The idct32x32 function actually pushed q4-q7 onto the stack even though it didn't clobber them; there are plenty of registers that can be used to allow keeping all the idct coefficients in registers without having to reload different subsets of them at different stages in the transform. Since the idct16 core transform avoids clobbering q4-q7 (but clobbers q2-q3 instead, to avoid needing to back up and restore q4-q7 at all in the idct16 function), and the lanewise vmul needs a register in the q0-q3 range, we move the stored coefficients from q2-q3 into q4-q5 while doing idct16. While keeping these coefficients in registers, we still can skip pushing q7. Before: Cortex A7 A8 A9 A53 vp9_inv_dct_dct_32x32_sub32_add_neon: 18553.8 17182.7 14303.3 12089.7 After: vp9_inv_dct_dct_32x32_sub32_add_neon: 18470.3 16717.7 14173.6 11860.8 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=402546a17233a8815307df9e14ff88cd70424537 --- libavcodec/arm/vp9itxfm_neon.S | 246 - 1 file changed, 120 insertions(+), 126 deletions(-) diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index 8dc4bbfa55..bed502eba2 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -1185,58 +1185,51 @@ function idct32x32_dc_add_neon endfunc .macro idct32_end -butterfly d16, d5, d4, d5 @ d16 = t16a, d5 = t19a +butterfly d16, d9, d8, d9 @ d16 = t16a, d9 = t19a butterfly d17, d20, d23, d20 @ d17 = t17, d20 = t18 -butterfly d18, d6, d7, d6 @ d18 = t23a, d6 = t20a +butterfly d18, d10, d11, d10 @ d18 = t23a, d10 = t20a butterfly d19, d21, d22, d21 @ d19 = t22, d21 = t21 -butterfly d4, d28, d28, d30 @ d4 = t24a, d28 = t27a +butterfly d8, d28, d28, d30 @ d8 = t24a, d28 = t27a butterfly d23, d26, d25, d26 @ d23 = t25, d26 = t26 -butterfly d7, d29, d29, d31 @ d7 = t31a, d29 = t28a +butterfly d11, d29, d29, d31 @ d11 = t31a, d29 = t28a butterfly d22, d27, d24, d27 @ d22 = t30, d27 = t29 mbutterfly d27, d20, d0[1], d0[2], q12, q15@ d27 = t18a, d20 = t29a -mbutterfly d29, d5, d0[1], d0[2], q12, q15@ d29 = t19, d5 = t28 -mbutterfly d28, d6, d0[1], d0[2], q12, q15, neg=1 @ d28 = t27, d6 = t20 +mbutterfly d29, d9, d0[1], d0[2], q12, q15@ d29 = t19, d9 = t28 +mbutterfly d28, d10, d0[1], d0[2], q12, q15, neg=1 @ d28 = t27, d10 = t20 mbutterfly d26, d21, d0[1], d0[2], q12, q15, neg=1 @ d26 = t26a, d21 = t21a -butterfly d31, d24, d7, d4 @ d31 = t31, d24 = t24 +butterfly d31, d24, d11, d8 @ d31 = t31, d24 = t24 butterfly d30, d25, d22, d23 @ d30 = t30a, d25 = t25a butterfly_r d23, d16, d16, d18 @ d23 = t23, d16 = t16 butterfly_r d22, d17, d17, d19 @ d22 = t22a, d17 = t17a butterfly d18, d21, d27, d21 @ d18 = t18, d21 = t21 -butterfly_r d27, d28, d5, d28 @ d27 = t27a, d28 = t28a -butterfly d4, d26, d20, d26 @ d4 = t29, d26 = t26 -butterfly d19, d20, d29, d6 @ d19 = t19a, d20 = t20 -vmovd29, d4@ d29 = t29 - -mbutterfly0 d27, d20, d27, d20, d4, d6, q2, q3 @ d27 = t27, d20 = t20 -mbutterfly0 d26, d21, d26, d21, d4, d6, q2, q3 @ d26 = t26a, d21 = t21a -mbutterfly0 d25, d22, d25, d22, d4, d6, q2, q3 @ d25 = t25, d22 = t22 -mbutterfly0 d24, d23, d24, d23, d4, d6, q2, q3 @ d24 = t24a, d23 = t23a +butterfly_r d27, d28, d9, d28 @ d27 = t27a, d28 = t28a +butterfly d8, d26, d20, d26 @ d8 = t29, d26 = t26 +butterfly d19, d20, d29, d10 @ d19 = t19a, d20 = t20 +vmovd29, d8@ d29 = t29 + +mbutterfly0 d27, d20, d27, d20, d8, d10, q4, q5 @ d27 = t27, d20 = t20 +mbutterfly0 d26, d21, d26, d21, d8, d10, q4, q5 @ d26 = t26a, d21 = t21a +mbutterfly0 d25, d22, d25, d22, d8, d10, q4, q5 @ d25 = t25, d22 = t22 +mbutterfly0 d24, d23, d24, d23, d8, d10, q4, q5 @ d24 = t24a, d23 = t23a bx lr .endm function idct32_odd -movrel r12, idct_coeffs -add r12, r12, #32 -vld1.16 {q0-q1}, [r12,:128] - -mbutterfly d16, d31, d0[0], d0[1], q2, q3 @ d16 = t16a, d31 = t31a -mbutterfly d24, d23, d0[2], d0[3], q2, q3 @
[FFmpeg-cvslog] aarch64: vp9itxfm: Avoid reloading the idct32 coefficients
ffmpeg | branch: master | Martin Storsjö| Mon Jan 2 22:08:41 2017 +0200| [65aa002d54433154a6924dc13e498bec98451ad0] | committer: Martin Storsjö aarch64: vp9itxfm: Avoid reloading the idct32 coefficients The idct32x32 function actually pushed d8-d15 onto the stack even though it didn't clobber them; there are plenty of registers that can be used to allow keeping all the idct coefficients in registers without having to reload different subsets of them at different stages in the transform. After this, we still can skip pushing d12-d15. Before: vp9_inv_dct_dct_32x32_sub32_add_neon: 8128.3 After: vp9_inv_dct_dct_32x32_sub32_add_neon: 8053.3 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=65aa002d54433154a6924dc13e498bec98451ad0 --- libavcodec/aarch64/vp9itxfm_neon.S | 110 +++-- 1 file changed, 43 insertions(+), 67 deletions(-) diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index d35f103a79..b6c2575236 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -1123,18 +1123,14 @@ endfunc .endm function idct32_odd -ld1 {v0.8h,v1.8h}, [x11] - -dmbutterfly v16, v31, v0.h[0], v0.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a -dmbutterfly v24, v23, v0.h[2], v0.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a -dmbutterfly v20, v27, v0.h[4], v0.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a -dmbutterfly v28, v19, v0.h[6], v0.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a -dmbutterfly v18, v29, v1.h[0], v1.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a -dmbutterfly v26, v21, v1.h[2], v1.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a -dmbutterfly v22, v25, v1.h[4], v1.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a -dmbutterfly v30, v17, v1.h[6], v1.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a - -ld1 {v0.8h}, [x10] +dmbutterfly v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a +dmbutterfly v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a +dmbutterfly v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a +dmbutterfly v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a +dmbutterfly v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a +dmbutterfly v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a +dmbutterfly v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a +dmbutterfly v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a butterfly_8hv4, v24, v16, v24 // v4 = t16, v24 = t17 butterfly_8hv5, v20, v28, v20 // v5 = t19, v20 = t18 @@ -1153,18 +1149,14 @@ function idct32_odd endfunc function idct32_odd_half -ld1 {v0.8h,v1.8h}, [x11] - -dmbutterfly_h1 v16, v31, v0.h[0], v0.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a -dmbutterfly_h2 v24, v23, v0.h[2], v0.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a -dmbutterfly_h1 v20, v27, v0.h[4], v0.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a -dmbutterfly_h2 v28, v19, v0.h[6], v0.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a -dmbutterfly_h1 v18, v29, v1.h[0], v1.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a -dmbutterfly_h2 v26, v21, v1.h[2], v1.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a -dmbutterfly_h1 v22, v25, v1.h[4], v1.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a -dmbutterfly_h2 v30, v17, v1.h[6], v1.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a - -ld1 {v0.8h}, [x10] +dmbutterfly_h1 v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a +dmbutterfly_h2 v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a +dmbutterfly_h1 v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a +dmbutterfly_h2 v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a +dmbutterfly_h1 v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a +dmbutterfly_h2 v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a +dmbutterfly_h1 v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a +dmbutterfly_h2 v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a butterfly_8hv4, v24, v16, v24 // v4 = t16, v24 = t17 butterfly_8hv5, v20, v28, v20 // v5 = t19, v20 = t18 @@ -1183,18 +1175,14 @@ function idct32_odd_half endfunc function idct32_odd_quarter -ld1 {v0.8h,v1.8h}, [x11] - -dsmull_hv4, v5, v16, v0.h[0] -
[FFmpeg-cvslog] arm: vp9lpf: Implement the mix2_44 function with one single filter pass
ffmpeg | branch: master | Martin Storsjö| Sat Jan 14 13:22:30 2017 +0200| [575e31e931e4178e9f1e24407503c9b4ec0ef9ba] | committer: Martin Storsjö arm: vp9lpf: Implement the mix2_44 function with one single filter pass For this case, with 8 inputs but only changing 4 of them, we can fit all 16 input pixels into a q register, and still have enough temporary registers for doing the loop filter. The wd=8 filters would require too many temporary registers for processing all 16 pixels at once though. Before: Cortex A7 A8 A9 A53 vp9_loop_filter_mix2_v_44_16_neon: 289.7 256.2 237.5 181.2 After: vp9_loop_filter_mix2_v_44_16_neon: 221.2 150.5 177.7 138.0 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=575e31e931e4178e9f1e24407503c9b4ec0ef9ba --- libavcodec/arm/vp9dsp_init_arm.c | 7 +- libavcodec/arm/vp9lpf_neon.S | 191 +++ 2 files changed, 195 insertions(+), 3 deletions(-) diff --git a/libavcodec/arm/vp9dsp_init_arm.c b/libavcodec/arm/vp9dsp_init_arm.c index e99d931674..1ede1708e7 100644 --- a/libavcodec/arm/vp9dsp_init_arm.c +++ b/libavcodec/arm/vp9dsp_init_arm.c @@ -194,6 +194,8 @@ define_loop_filters(8, 8); define_loop_filters(16, 8); define_loop_filters(16, 16); +define_loop_filters(44, 16); + #define lf_mix_fn(dir, wd1, wd2, stridea) \ static void loop_filter_##dir##_##wd1##wd2##_16_neon(uint8_t *dst, \ ptrdiff_t stride, \ @@ -207,7 +209,6 @@ static void loop_filter_##dir##_##wd1##wd2##_16_neon(uint8_t *dst, lf_mix_fn(h, wd1, wd2, stride) \ lf_mix_fn(v, wd1, wd2, sizeof(uint8_t)) -lf_mix_fns(4, 4) lf_mix_fns(4, 8) lf_mix_fns(8, 4) lf_mix_fns(8, 8) @@ -227,8 +228,8 @@ static av_cold void vp9dsp_loopfilter_init_arm(VP9DSPContext *dsp) dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_neon; dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_neon; -dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_neon; -dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_neon; +dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_neon; +dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_neon; dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_neon; dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_neon; dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_neon; diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S index e31c807cc0..12984a900c 100644 --- a/libavcodec/arm/vp9lpf_neon.S +++ b/libavcodec/arm/vp9lpf_neon.S @@ -44,6 +44,109 @@ vtrn.8 \r2, \r3 .endm +@ The input to and output from this macro is in the registers q8-q15, +@ and q0-q7 are used as scratch registers. +@ p3 = q8, p0 = q11, q0 = q12, q3 = q15 +.macro loop_filter_q +vdup.u8 d0, r2 @ E +lsr r2, r2, #8 +vdup.u8 d2, r3 @ I +lsr r3, r3, #8 +vdup.u8 d1, r2 @ E +vdup.u8 d3, r3 @ I + +vabd.u8 q2, q8, q9 @ abs(p3 - p2) +vabd.u8 q3, q9, q10@ abs(p2 - p1) +vabd.u8 q4, q10, q11@ abs(p1 - p0) +vabd.u8 q5, q12, q13@ abs(q0 - q1) +vabd.u8 q6, q13, q14@ abs(q1 - q2) +vabd.u8 q7, q14, q15@ abs(q2 - q3) +vmax.u8 q2, q2, q3 +vmax.u8 q3, q4, q5 +vmax.u8 q4, q6, q7 +vabd.u8 q5, q11, q12@ abs(p0 - q0) +vmax.u8 q2, q2, q3 +vqadd.u8q5, q5, q5 @ abs(p0 - q0) * 2 +vabd.u8 q7, q10, q13@ abs(p1 - q1) +vmax.u8 q2, q2, q4 @ max(abs(p3 - p2), ..., abs(q2 - q3)) +vshr.u8 q7, q7, #1 +vcle.u8 q2, q2, q1 @ max(abs()) <= I +vqadd.u8q5, q5, q7 @ abs(p0 - q0) * 2 + abs(p1 - q1) >> 1 +vcle.u8 q5, q5, q0 +vandq2, q2, q5 @ fm + +vshrn.u16 d10, q2, #4 +vmovr2, r3, d10 +orrsr2, r2, r3 +@ If no pixels need filtering, just exit as soon as possible +beq 9f + +@ Calculate the normal inner loop filter for 2 or 4 pixels +ldr r3, [sp, #64] +vabd.u8 q3, q10, q11@ abs(p1 - p0) +vabd.u8 q4, q13, q12@ abs(q1 - q0) + +vsubl.u8q5, d20, d26@ p1 - q1 +vsubl.u8q6, d21, d27@ p1 - q1 +vmax.u8 q3, q3, q4 @ max(abs(p1 - p0), abs(q1 - q0)) +
[FFmpeg-cvslog] arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit
ffmpeg | branch: master | Martin Storsjö| Sat Jan 14 20:49:19 2017 +0200| [c582cb8537367721bb399a5d01b652c20142b756] | committer: Martin Storsjö arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit The theoretical maximum value of E is 193, so we can just saturate the addition to 255. Before: Cortex A7 A8 A9 A53 A53/AArch64 vp9_loop_filter_v_4_8_neon: 143.0 127.7 114.888.0 87.7 vp9_loop_filter_v_8_8_neon: 241.0 197.2 173.7 140.0136.7 vp9_loop_filter_v_16_8_neon:497.0 419.5 379.7 293.0275.7 vp9_loop_filter_v_16_16_neon: 965.2 818.7 731.4 579.0452.0 After: vp9_loop_filter_v_4_8_neon: 136.0 125.7 112.684.0 83.0 vp9_loop_filter_v_8_8_neon: 234.0 195.5 171.5 136.0133.7 vp9_loop_filter_v_16_8_neon:490.0 417.5 377.7 289.0271.0 vp9_loop_filter_v_16_16_neon: 951.2 814.7 732.3 571.0446.7 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c582cb8537367721bb399a5d01b652c20142b756 --- libavcodec/aarch64/vp9lpf_neon.S | 40 +--- libavcodec/arm/vp9lpf_neon.S | 11 +-- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 5fafc7ad5c..48cac4cac6 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -51,13 +51,6 @@ // see the arm version instead. -.macro uabdl_sz dst1, dst2, in1, in2, sz -uabdl \dst1, \in1\().8b, \in2\().8b -.ifc \sz, .16b -uabdl2 \dst2, \in1\().16b, \in2\().16b -.endif -.endm - .macro add_sz dst1, dst2, in1, in2, in3, in4, sz add \dst1, \in1, \in3 .ifc \sz, .16b @@ -86,20 +79,6 @@ .endif .endm -.macro cmhs_sz dst1, dst2, in1, in2, in3, in4, sz -cmhs\dst1, \in1, \in3 -.ifc \sz, .16b -cmhs\dst2, \in2, \in4 -.endif -.endm - -.macro xtn_sz dst, in1, in2, sz -xtn \dst\().8b, \in1 -.ifc \sz, .16b -xtn2\dst\().16b, \in2 -.endif -.endm - .macro usubl_sz dst1, dst2, in1, in2, sz usubl \dst1, \in1\().8b, \in2\().8b .ifc \sz, .16b @@ -179,20 +158,20 @@ // tmpq2 == tmp3 + tmp4, etc. .macro loop_filter wd, sz, mix, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8 .if \mix == 0 -dup v0.8h, w2// E -dup v1.8h, w2// E +dup v0\sz, w2// E dup v2\sz, w3// I dup v3\sz, w4// H .else -dup v0.8h, w2// E +dup v0.8b, w2// E dup v2.8b, w3// I dup v3.8b, w4// H +lsr w5, w2, #8 lsr w6, w3, #8 lsr w7, w4, #8 -ushrv1.8h, v0.8h, #8 // E +dup v1.8b, w5// E dup v4.8b, w6// I -bic v0.8h, #255, lsl 8 // E dup v5.8b, w7// H +trn1v0.2d, v0.2d, v1.2d trn1v2.2d, v2.2d, v4.2d trn1v3.2d, v3.2d, v5.2d .endif @@ -206,16 +185,15 @@ umaxv4\sz, v4\sz, v5\sz umaxv5\sz, v6\sz, v7\sz umax\tmp1\sz, \tmp1\sz, \tmp2\sz -uabdl_szv6.8h, v7.8h, v23, v24, \sz // abs(p0 - q0) +uabdv6\sz, v23\sz, v24\sz// abs(p0 - q0) umaxv4\sz, v4\sz, v5\sz -add_sz v6.8h, v7.8h, v6.8h, v7.8h, v6.8h, v7.8h, \sz // abs(p0 - q0) * 2 +uqadd v6\sz, v6\sz, v6\sz // abs(p0 - q0) * 2 uabdv5\sz, v22\sz, v25\sz// abs(p1 - q1) umaxv4\sz, v4\sz, \tmp1\sz // max(abs(p3 - p2), ..., abs(q2 - q3)) ushrv5\sz, v5\sz, #1 cmhsv4\sz, v2\sz, v4\sz // max(abs()) <= I -uaddw_szv6.8h, v7.8h, v6.8h, v7.8h, v5, \sz // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1 -cmhs_sz v6.8h, v7.8h, v0.8h, v1.8h, v6.8h, v7.8h, \sz -xtn_sz v5, v6.8h, v7.8h, \sz +uqadd v6\sz, v6\sz, v5\sz // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1 +cmhsv5\sz, v0\sz, v6\sz and v4\sz, v4\sz, v5\sz // fm // If no pixels need filtering, just exit as soon as possible diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S index 1e161e0c63..e31c807cc0 100644 --- a/libavcodec/arm/vp9lpf_neon.S +++ b/libavcodec/arm/vp9lpf_neon.S @@ -51,7 +51,7 @@ @
[FFmpeg-cvslog] aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1
ffmpeg | branch: master | Martin Storsjö| Thu Feb 23 23:33:58 2017 +0200| [3bf9c48320f25f3d5557485b0202f22ae60748b0] | committer: Martin Storsjö aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1 This is one cycle faster in total, and three instructions fewer. Before: vp9_loop_filter_mix2_v_44_16_neon: 123.2 After: vp9_loop_filter_mix2_v_44_16_neon: 122.2 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3bf9c48320f25f3d5557485b0202f22ae60748b0 --- libavcodec/aarch64/vp9lpf_neon.S | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S index 48cac4cac6..e9c497096b 100644 --- a/libavcodec/aarch64/vp9lpf_neon.S +++ b/libavcodec/aarch64/vp9lpf_neon.S @@ -162,18 +162,15 @@ dup v2\sz, w3// I dup v3\sz, w4// H .else -dup v0.8b, w2// E -dup v2.8b, w3// I -dup v3.8b, w4// H -lsr w5, w2, #8 -lsr w6, w3, #8 -lsr w7, w4, #8 -dup v1.8b, w5// E -dup v4.8b, w6// I -dup v5.8b, w7// H -trn1v0.2d, v0.2d, v1.2d -trn1v2.2d, v2.2d, v4.2d -trn1v3.2d, v3.2d, v5.2d +dup v0.8h, w2// E +dup v2.8h, w3// I +dup v3.8h, w4// H +rev16 v1.16b, v0.16b// E +rev16 v4.16b, v2.16b// I +rev16 v5.16b, v3.16b// H +uzp1v0.16b, v0.16b, v1.16b +uzp1v2.16b, v2.16b, v4.16b +uzp1v3.16b, v3.16b, v5.16b .endif uabdv4\sz, v20\sz, v21\sz// abs(p3 - p2) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Place attribute_deprecated in the right position for struct declarations
ffmpeg | branch: master | Diego Biurrun| Wed Feb 22 11:39:21 2017 +0100| [ed6a891c364f8b0850b557d9578b8920cc15a937] | committer: Diego Biurrun Place attribute_deprecated in the right position for struct declarations libavcodec/vaapi.h:58:1: warning: attribute 'deprecated' is ignored, place it after "struct" to apply attribute to type declaration [-Wignored-attributes] > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed6a891c364f8b0850b557d9578b8920cc15a937 --- libavcodec/vaapi.h | 3 +-- libavcodec/xvmc.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h index ceb7904bea..391368c85f 100644 --- a/libavcodec/vaapi.h +++ b/libavcodec/vaapi.h @@ -55,8 +55,7 @@ * * Deprecated: use AVCodecContext.hw_frames_ctx instead. */ -attribute_deprecated -struct vaapi_context { +struct attribute_deprecated vaapi_context { /** * Window system dependent data * diff --git a/libavcodec/xvmc.h b/libavcodec/xvmc.h index 950ed18276..91027b9c2f 100644 --- a/libavcodec/xvmc.h +++ b/libavcodec/xvmc.h @@ -45,7 +45,7 @@ #define AV_XVMC_ID0x1DC711C0 /**< special value to ensure that regular pixel routines haven't corrupted the struct the number is 1337 speak for the letters IDCT MCo (motion compensation) */ -attribute_deprecated struct xvmc_pix_fmt { +struct attribute_deprecated xvmc_pix_fmt { /** The field contains the special constant value AV_XVMC_ID. It is used as a test that the application correctly uses the API, and that there is no corruption caused by pixel routines. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit 'ed6a891c364f8b0850b557d9578b8920cc15a937'
ffmpeg | branch: master | James Almer| Mon Oct 2 16:05:36 2017 -0300| [f40cd7899188cae676ec01ee817ec764381c8403] | committer: James Almer Merge commit 'ed6a891c364f8b0850b557d9578b8920cc15a937' * commit 'ed6a891c364f8b0850b557d9578b8920cc15a937': Place attribute_deprecated in the right position for struct declarations This commit is a noop, see 99530387283fc58e6c3ac42724955b8569daf548 6ff3da4f6a8e460d77bb65fed4267bf48f513fcf Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f40cd7899188cae676ec01ee817ec764381c8403 --- ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8'
ffmpeg | branch: master | James Almer| Mon Oct 2 16:00:14 2017 -0300| [0451c3db4ee21afb437279b656c83269677801e5] | committer: James Almer Merge commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8' * commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8': mkv: Update the seek test to match 5d3953a5dc fate: Update fate-lavf-mkv after commit 5d3953a5dc This commit is a noop. Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0451c3db4ee21afb437279b656c83269677801e5 --- ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] mkv: Update the seek test to match 5d3953a5dc
ffmpeg | branch: master | Luca Barbato| Wed Feb 22 09:55:45 2017 +0100| [04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8] | committer: Luca Barbato mkv: Update the seek test to match 5d3953a5dc > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8 --- tests/ref/seek/lavf-mkv | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/ref/seek/lavf-mkv b/tests/ref/seek/lavf-mkv index 530c8dfe4d..8267a2a255 100644 --- a/tests/ref/seek/lavf-mkv +++ b/tests/ref/seek/lavf-mkv @@ -1,53 +1,53 @@ -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 1 flags:1 dts: 0.00 pts: 0.00 pos:633 size: 208 ret: 0 st:-1 flags:0 ts:-1.00 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st:-1 flags:1 ts: 1.894167 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 0 flags:0 ts: 0.788000 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 0 flags:1 ts:-0.317000 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st: 1 flags:0 ts: 2.577000 ret:-EOF ret: 0 st: 1 flags:1 ts: 1.471000 -ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size: 209 +ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size: 209 ret: 0 st:-1 flags:0 ts: 0.365002 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 27925 ret: 0 st:-1 flags:1 ts:-0.740831 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st: 0 flags:0 ts: 2.153000 ret:-EOF ret: 0 st: 0 flags:1 ts: 1.048000 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 1 flags:0 ts:-0.058000 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 1 flags:1 dts: 0.00 pts: 0.00 pos:633 size: 208 ret: 0 st: 1 flags:1 ts: 2.836000 -ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size: 209 +ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size: 209 ret: 0 st:-1 flags:0 ts: 1.730004 ret:-EOF ret: 0 st:-1 flags:1 ts: 0.624171 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 27925 ret: 0 st: 0 flags:0 ts:-0.482000 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st: 0 flags:1 ts: 2.413000 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 1 flags:0 ts: 1.307000 ret:-EOF ret: 0 st: 1 flags:1 ts: 0.201000 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 1 flags:1 dts: 0.183000 pts: 0.183000 pos: 72204 size: 209 ret: 0 st:-1 flags:0 ts:-0.904994 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st:-1 flags:1 ts: 1.989173 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 0 flags:0 ts: 0.883000 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 27834 ret: 0 st: 0 flags:1 ts:-0.222000 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ret: 0 st: 1 flags:0 ts: 2.672000 ret:-EOF ret: 0 st: 1 flags:1 ts: 1.566000 -ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size: 209 +ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size: 209 ret: 0 st:-1 flags:0 ts: 0.460008 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 27925 ret: 0 st:-1 flags:1 ts:-0.645825 -ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size: 208 +ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 27837 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: Update fate-lavf-mkv after commit 5d3953a5dc
ffmpeg | branch: master | John Stebbins| Tue Feb 21 16:47:20 2017 -0700| [fec3456ce188e895a2082fc1fb298570fc29ad29] | committer: John Stebbins fate: Update fate-lavf-mkv after commit 5d3953a5dc > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fec3456ce188e895a2082fc1fb298570fc29ad29 --- tests/ref/lavf/mkv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ref/lavf/mkv b/tests/ref/lavf/mkv index db0aba0125..04c5f3cac2 100644 --- a/tests/ref/lavf/mkv +++ b/tests/ref/lavf/mkv @@ -1,3 +1,3 @@ -76d400179dfd6143f50ea4d19fe8ed99 *./tests/data/lavf/lavf.mkv +dad336329ef85127f97e9d12a3b57a59 *./tests/data/lavf/lavf.mkv 320383 ./tests/data/lavf/lavf.mkv -./tests/data/lavf/lavf.mkv CRC=0x36193cda +./tests/data/lavf/lavf.mkv CRC=0x63ed3cda ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab'
ffmpeg | branch: master | James Almer| Mon Oct 2 15:52:18 2017 -0300| [ff29db4ebf2245a33e184d79ebb294c71d42ea1f] | committer: James Almer Merge commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab' * commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab': fate: Add webp alpha test This commit is a noop, see bd2cec7021a0e275d4551a56d0a220ba2f9bd691 2bdb26b4eeec8142c927645a10b7f55cae5cdcc5 92bf87db294c0544168f572ba6739db7d2ba2ba3 Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ff29db4ebf2245a33e184d79ebb294c71d42ea1f --- ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] fate: Add webp alpha test
ffmpeg | branch: master | Mark Thompson| Fri Feb 17 23:13:14 2017 +| [156bc0193bd47d3f4b3adaa93be0e206e12686ab] | committer: Mark Thompson fate: Add webp alpha test > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=156bc0193bd47d3f4b3adaa93be0e206e12686ab --- tests/fate/image.mak | 11 +-- tests/ref/fate/{webp => webp-yuv420p} | 0 tests/ref/fate/webp-yuva420p | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/fate/image.mak b/tests/fate/image.mak index 83fa71a199..7d970312d4 100644 --- a/tests/fate/image.mak +++ b/tests/fate/image.mak @@ -223,8 +223,15 @@ FATE_TIFF-$(call DEMDEC, IMAGE2, TIFF) += $(FATE_TIFF) FATE_SAMPLES_AVCONV += $(FATE_TIFF-yes) fate-tiff: $(FATE_TIFF-yes) -FATE_SAMPLES_AVCONV-$(call DEMDEC, IMAGE2, WEBP) += fate-webp -fate-webp: CMD = framecrc -i $(TARGET_SAMPLES)/webp/image_small.webp +FATE_WEBP += fate-webp-yuv420p +fate-webp-yuv420p: CMD = framecrc -i $(TARGET_SAMPLES)/webp/image_small.webp + +FATE_WEBP += fate-webp-yuva420p +fate-webp-yuva420p: CMD = framecrc -i $(TARGET_SAMPLES)/webp/1_webp_a.webp + +FATE_WEBP-$(call DEMDEC, IMAGE2, WEBP) += $(FATE_WEBP) +FATE_SAMPLES_AVCONV += $(FATE_WEBP-yes) +fate-webp: $(FATE_WEBP-yes) FATE_XBM += fate-xbm10 fate-xbm10: CMD = framecrc -i $(TARGET_SAMPLES)/xbm/xl.xbm diff --git a/tests/ref/fate/webp b/tests/ref/fate/webp-yuv420p similarity index 100% rename from tests/ref/fate/webp rename to tests/ref/fate/webp-yuv420p diff --git a/tests/ref/fate/webp-yuva420p b/tests/ref/fate/webp-yuva420p new file mode 100644 index 00..3350edf2d7 --- /dev/null +++ b/tests/ref/fate/webp-yuva420p @@ -0,0 +1,2 @@ +#tb 0: 1/25 +0, 0, 0,1, 301200, 0x12b071a0 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] matroskaenc: factor ts_offset into block timecode computation
ffmpeg | branch: master | John Stebbins| Wed Feb 15 15:22:40 2017 -0700| [5d3953a5dcfd5f71391b7f34908517eb6f7e5146] | committer: John Stebbins matroskaenc: factor ts_offset into block timecode computation ts_offset was added to cluster timecode, but then effectively subtracted back off the block timecode When setting initial_padding for an audio stream, the timestamps are written incorrectly to the mkv file. cluster timecode gets written as pts0 + ts_offset which is correct, but then block timecode gets written as pts - cluster timecode which expanded is pts - (pts0 + ts_offset). Adding cluster and block tc back together: cluster + block = (pts0 + ts_offset) + (pts - (pts0 + ts_offset)) = pts But the result should be pts + ts_offset since demux will subtract the CodecDelay element from pts and set initial_padding to CodecDelay. This patch gives the correct result. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5d3953a5dcfd5f71391b7f34908517eb6f7e5146 --- libavformat/matroskaenc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index e951a0fb67..2fe6e0ed49 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -1461,6 +1461,7 @@ static void mkv_write_block(AVFormatContext *s, AVIOContext *pb, uint8_t *data = NULL; int offset = 0, size = pkt->size; int64_t ts = mkv->tracks[pkt->stream_index].write_dts ? pkt->dts : pkt->pts; +ts += mkv->tracks[pkt->stream_index].ts_offset; av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, " "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", flags %d\n", ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Merge commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146'
ffmpeg | branch: master | James Almer| Mon Oct 2 15:26:56 2017 -0300| [45121cbdda00627c9b51b3fd10ea044ebfaa6664] | committer: James Almer Merge commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146' * commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146': matroskaenc: factor ts_offset into block timecode computation Merged-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=45121cbdda00627c9b51b3fd10ea044ebfaa6664 --- libavformat/matroskaenc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 17094f82b0..6f094c458c 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -2113,6 +2113,8 @@ static void mkv_write_block(AVFormatContext *s, AVIOContext *pb, uint8_t track_number = (mkv->is_dash ? mkv->dash_track_number : (pkt->stream_index + 1)); ebml_master block_group, block_additions, block_more; +ts += mkv->tracks[pkt->stream_index].ts_offset; + av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, " "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", keyframe %d\n", avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration, == diff --cc libavformat/matroskaenc.c index 17094f82b0,2fe6e0ed49..6f094c458c --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@@ -2105,18 -1458,14 +2105,20 @@@ static void mkv_write_block(AVFormatCon { MatroskaMuxContext *mkv = s->priv_data; AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar; -uint8_t *data = NULL; -int offset = 0, size = pkt->size; +uint8_t *data = NULL, *side_data = NULL; +int offset = 0, size = pkt->size, side_data_size = 0; int64_t ts = mkv->tracks[pkt->stream_index].write_dts ? pkt->dts : pkt->pts; +uint64_t additional_id = 0; +int64_t discard_padding = 0; +uint8_t track_number = (mkv->is_dash ? mkv->dash_track_number : (pkt->stream_index + 1)); +ebml_master block_group, block_additions, block_more; + + ts += mkv->tracks[pkt->stream_index].ts_offset; + av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, " - "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", flags %d\n", - avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration, flags); + "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", keyframe %d\n", + avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration, + keyframe != 0); if (par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 && (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)) ff_avc_parse_nal_units_buf(pkt->data, , ); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avdevice/decklink_dec: remove av_dup_packet() usage
ffmpeg | branch: master | James Almer| Mon Oct 2 13:08:39 2017 -0300| [e91f0c4f8b3e81bc63838cc67370a7b13c8d9e78] | committer: James Almer avdevice/decklink_dec: remove av_dup_packet() usage Reviewed-by: Marton Balint Signed-off-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e91f0c4f8b3e81bc63838cc67370a7b13c8d9e78 --- libavdevice/decklink_dec.cpp | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp index 9d12d0fed0..53ff576ec5 100644 --- a/libavdevice/decklink_dec.cpp +++ b/libavdevice/decklink_dec.cpp @@ -450,22 +450,24 @@ static unsigned long long avpacket_queue_size(AVPacketQueue *q) static int avpacket_queue_put(AVPacketQueue *q, AVPacket *pkt) { AVPacketList *pkt1; +int ret; // Drop Packet if queue size is > maximum queue size if (avpacket_queue_size(q) > (uint64_t)q->max_q_size) { av_log(q->avctx, AV_LOG_WARNING, "Decklink input buffer overrun!\n"); return -1; } -/* duplicate the packet */ -if (av_dup_packet(pkt) < 0) { -return -1; -} -pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList)); +pkt1 = (AVPacketList *)av_mallocz(sizeof(AVPacketList)); if (!pkt1) { return -1; } -pkt1->pkt = *pkt; +ret = av_packet_ref(>pkt, pkt); +av_packet_unref(pkt); +if (ret < 0) { +av_free(pkt1); +return -1; +} pkt1->next = NULL; pthread_mutex_lock(>mutex); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avdevice/decklink_dec: use av_packet_add_side_data()
ffmpeg | branch: master | James Almer| Sun Oct 1 23:31:12 2017 -0300| [0c1ffd0aa55c6cef6dffe2b736786c6cb86d8a3d] | committer: James Almer avdevice/decklink_dec: use av_packet_add_side_data() It uses the existing buffer instead of allocating a new one. Reviewed-by: Marton Balint Signed-off-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c1ffd0aa55c6cef6dffe2b736786c6cb86d8a3d --- libavdevice/decklink_dec.cpp | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp index 8a14094474..9d12d0fed0 100644 --- a/libavdevice/decklink_dec.cpp +++ b/libavdevice/decklink_dec.cpp @@ -390,10 +390,8 @@ uint8_t *get_metadata(AVFormatContext *avctx, uint16_t *buf, size_t width, clear_parity_bits(buf, len); data = vanc_to_cc(avctx, buf, width, data_len); if (data) { -uint8_t *pkt_cc = av_packet_new_side_data(pkt, AV_PKT_DATA_A53_CC, data_len); -if (pkt_cc) -memcpy(pkt_cc, data, data_len); -av_free(data); +if (av_packet_add_side_data(pkt, AV_PKT_DATA_A53_CC, data, data_len) < 0) +av_free(data); } } else { av_log(avctx, AV_LOG_DEBUG, "Unknown meta data DID = 0x%.2x SDID = 0x%.2x\n", ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/encode: remove usage of av_dup_packet()
ffmpeg | branch: master | James Almer| Tue Sep 26 00:24:29 2017 -0300| [a22c6a4796ca1f2cbee6784262515da876fbec22] | committer: James Almer avcodec/encode: remove usage of av_dup_packet() Reviewed-by: wm4 Signed-off-by: James Almer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a22c6a4796ca1f2cbee6784262515da876fbec22 --- libavcodec/encode.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libavcodec/encode.c b/libavcodec/encode.c index 525ee1f5d6..dd50486bcf 100644 --- a/libavcodec/encode.c +++ b/libavcodec/encode.c @@ -222,10 +222,12 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx, } avpkt->buf = user_pkt.buf; avpkt->data = user_pkt.data; -} else { -if (av_dup_packet(avpkt) < 0) { -ret = AVERROR(ENOMEM); -} +} else if (!avpkt->buf) { +AVPacket tmp = { 0 }; +ret = av_packet_ref(, avpkt); +if (ret < 0) +return ret; +*avpkt = tmp; } } @@ -318,10 +320,12 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx, } avpkt->buf = user_pkt.buf; avpkt->data = user_pkt.data; -} else { -if (av_dup_packet(avpkt) < 0) { -ret = AVERROR(ENOMEM); -} +} else if (!avpkt->buf) { +AVPacket tmp = { 0 }; +ret = av_packet_ref(, avpkt); +if (ret < 0) +return ret; +*avpkt = tmp; } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog