[FFmpeg-cvslog] fate: disable fate-svq3-2

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 21:40:59 
2017 -0300| [aa4fe27657462742943dfbd185a18c223ae4dca3] | committer: James Almer

fate: disable fate-svq3-2

The first frame changes depending on --enable-memory-poisoning being
used to configure ffmpeg or not, even if requesting bitexact decoding.
Disable the test until this is fixed.

Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa4fe27657462742943dfbd185a18c223ae4dca3
---

 tests/fate/qt.mak | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak
index 2a7fc2e0fa..c054129f08 100644
--- a/tests/fate/qt.mak
+++ b/tests/fate/qt.mak
@@ -52,7 +52,8 @@ fate-svq1-headerswap: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq1/ct_ending_cut.mov
 FATE_SVQ3 += fate-svq3-1
 fate-svq3-1: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an
 
-FATE_SVQ3 += fate-svq3-2
+#FATE_SVQ3 += fate-svq3-2
+#FIXME: first frame changes depending on --enable-memory-poisoning being used 
to configure or not
 fate-svq3-2: CMD = framecrc -flags +bitexact -ignore_editlist 1 -i 
$(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an
 
 FATE_SVQ3 += fate-svq3-watermark

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/encode: free non-referenced packets' side data in the old encode API functions

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 18:58:39 
2017 -0300| [712ee85816ef854761f30ea57ea628997bd62e60] | committer: James Almer

avcodec/encode: free non-referenced packets' side data in the old encode API 
functions

Fixes memleaks introduced by a22c6a4796ca1f2cbee6784262515da876fbec22.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=712ee85816ef854761f30ea57ea628997bd62e60
---

 libavcodec/encode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index dd50486bcf..c152228c92 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -227,6 +227,7 @@ int attribute_align_arg 
avcodec_encode_audio2(AVCodecContext *avctx,
 ret = av_packet_ref(, avpkt);
 if (ret < 0)
 return ret;
+av_packet_unref(avpkt);
 *avpkt = tmp;
 }
 }
@@ -325,6 +326,7 @@ int attribute_align_arg 
avcodec_encode_video2(AVCodecContext *avctx,
 ret = av_packet_ref(, avpkt);
 if (ret < 0)
 return ret;
+av_packet_unref(avpkt);
 *avpkt = tmp;
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit '8e4d4efc67e154fdffd65964a7cfeef740320827'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 18:23:48 
2017 -0300| [b591329c3afe445c45eaecadd5fe3b80a837ee2f] | committer: James Almer

Merge commit '8e4d4efc67e154fdffd65964a7cfeef740320827'

* commit '8e4d4efc67e154fdffd65964a7cfeef740320827':
  fate: Add another SVQ3 test to increase coverage

Also included a fix from da8093f712d625db7ce4a2526fb52994e01921ec.

The demuxer option "-ignore_editlist 1 " is temporarily added to the
test as well, to workaround a regression in the edit list mov parsing
code.

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b591329c3afe445c45eaecadd5fe3b80a837ee2f
---

 tests/fate/qt.mak   | 12 ++--
 tests/ref/fate/{svq3 => svq3-1} |  0
 tests/ref/fate/svq3-2   | 24 
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak
index 335ec44dc2..2a7fc2e0fa 100644
--- a/tests/fate/qt.mak
+++ b/tests/fate/qt.mak
@@ -49,10 +49,18 @@ fate-svq1: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq1/marymary-shackles.mov -an -t
 FATE_QT-$(call DEMDEC, MOV, SVQ1) += fate-svq1-headerswap
 fate-svq1-headerswap: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq1/ct_ending_cut.mov -frames 4
 
-FATE_QT-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += fate-svq3 
fate-svq3-watermark
-fate-svq3: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an
+FATE_SVQ3 += fate-svq3-1
+fate-svq3-1: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an
+
+FATE_SVQ3 += fate-svq3-2
+fate-svq3-2: CMD = framecrc -flags +bitexact -ignore_editlist 1 -i 
$(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an
+
+FATE_SVQ3 += fate-svq3-watermark
 fate-svq3-watermark: CMD = framecrc -flags +bitexact -i 
$(TARGET_SAMPLES)/svq3/svq3_watermark.mov
 
+FATE_QT-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += $(FATE_SVQ3)
+fate-svq3: $(FATE_SVQ3)
+
 FATE_QT += $(FATE_QT-yes)
 
 FATE_SAMPLES_FFMPEG += $(FATE_QT)
diff --git a/tests/ref/fate/svq3 b/tests/ref/fate/svq3-1
similarity index 100%
rename from tests/ref/fate/svq3
rename to tests/ref/fate/svq3-1
diff --git a/tests/ref/fate/svq3-2 b/tests/ref/fate/svq3-2
new file mode 100644
index 00..1d89c05971
--- /dev/null
+++ b/tests/ref/fate/svq3-2
@@ -0,0 +1,24 @@
+#tb 0: 1/24
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 480x257
+#sar 0: 0/1
+0,  1,  1,1,   185280, 0x044209c4
+0,  2,  2,1,   185280, 0x427ef9a7
+0,  3,  3,1,   185280, 0x8f771cc8
+0,  4,  4,1,   185280, 0xb40d0e52
+0,  5,  5,1,   185280, 0x2e6ee461
+0,  6,  6,1,   185280, 0x681ba513
+0,  7,  7,1,   185280, 0x998c5676
+0,  8,  8,1,   185280, 0xf91003ec
+0,  9,  9,1,   185280, 0x322ed3de
+0, 10, 10,1,   185280, 0xb1c9370a
+0, 11, 11,1,   185280, 0x41423b36
+0, 12, 12,1,   185280, 0x0b9284e4
+0, 13, 13,1,   185280, 0x185789b2
+0, 14, 14,1,   185280, 0x8f0bece0
+0, 15, 15,1,   185280, 0xfcd9450e
+0, 16, 16,1,   185280, 0x509d868b
+0, 17, 17,1,   185280, 0x71fd9ae3
+0, 18, 18,1,   185280, 0x3dad1b3c
+0, 19, 19,1,   185280, 0x69ba37dd


==

diff --cc tests/fate/qt.mak
index 335ec44dc2,761db8d234..2a7fc2e0fa
--- a/tests/fate/qt.mak
+++ b/tests/fate/qt.mak
@@@ -7,53 -7,53 +7,61 @@@ fate-qdm2: CMP = oneof
  fate-qdm2: REF = $(SAMPLES)/qt-surge-suite/surge-2-16-B-QDM2.pcm
  fate-qdm2: FUZZ = 2
  
 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-mono
 +FATE_QT-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-mono
  fate-qt-alaw-mono: CMD = md5 -i 
$(TARGET_SAMPLES)/qt-surge-suite/surge-1-16-B-alaw.mov -f s16le
  
 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-stereo
 +FATE_QT-$(call DEMDEC, MOV, PCM_ALAW) += fate-qt-alaw-stereo
  fate-qt-alaw-stereo: CMD = md5 -i 
$(TARGET_SAMPLES)/qt-surge-suite/surge-2-16-B-alaw.mov -f s16le
  
 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-mono
 +FATE_QT-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-mono
  fate-qt-ima4-mono: CMD = md5 -i 
$(TARGET_SAMPLES)/qt-surge-suite/surge-1-16-B-ima4.mov -f s16le
  
 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-stereo
 +FATE_QT-$(call DEMDEC, MOV, ADPCM_IMA_QT) += fate-qt-ima4-stereo
  fate-qt-ima4-stereo: CMD = md5 -i 
$(TARGET_SAMPLES)/qt-surge-suite/surge-2-16-B-ima4.mov -f s16le
  
 -FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, MACE3) += fate-qt-mac3-mono
 +FATE_QT-$(call DEMDEC, MOV, MACE3) += fate-qt-mac3-mono
  

[FFmpeg-cvslog] fate: Add another SVQ3 test to increase coverage

2017-10-02 Thread Diego Biurrun
ffmpeg | branch: master | Diego Biurrun  | Sat Apr  6 
12:48:32 2013 +0200| [8e4d4efc67e154fdffd65964a7cfeef740320827] | committer: 
Diego Biurrun

fate: Add another SVQ3 test to increase coverage

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8e4d4efc67e154fdffd65964a7cfeef740320827
---

 tests/fate/qt.mak   | 10 --
 tests/ref/fate/{svq3 => svq3-1} |  0
 tests/ref/fate/svq3-2   | 20 
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/tests/fate/qt.mak b/tests/fate/qt.mak
index 97537f93ef..761db8d234 100644
--- a/tests/fate/qt.mak
+++ b/tests/fate/qt.mak
@@ -49,5 +49,11 @@ fate-svq1: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq1/marymary-shackles.mov -an -t
 FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, SVQ1) += fate-svq1-headerswap
 fate-svq1-headerswap: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq1/ct_ending_cut.mov -frames 4
 
-FATE_SAMPLES_AVCONV-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += fate-svq3
-fate-svq3: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an
+FATE_SVQ3 += fate-svq3-1
+fate-svq3-1: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/Vertical400kbit.sorenson3.mov -t 6 -an
+
+FATE_SVQ3 += fate-svq3-2
+fate-svq3-2: CMD = framecrc -i 
$(TARGET_SAMPLES)/svq3/svq3_decoding_regression.mov -an
+
+FATE_SAMPLES_AVCONV-$(call ALLYES, MOV_DEMUXER SVQ3_DECODER ZLIB) += 
$(FATE_SVQ3)
+fate-svq3: $(FATE_SVQ3)
diff --git a/tests/ref/fate/svq3 b/tests/ref/fate/svq3-1
similarity index 100%
rename from tests/ref/fate/svq3
rename to tests/ref/fate/svq3-1
diff --git a/tests/ref/fate/svq3-2 b/tests/ref/fate/svq3-2
new file mode 100644
index 00..7e69b31b01
--- /dev/null
+++ b/tests/ref/fate/svq3-2
@@ -0,0 +1,20 @@
+#tb 0: 1/19200
+0, -2, -2,0,   185280, 0x061c0d85
+0, -19200, -19200,0,   185280, 0x427ef9a7
+0, -18400, -18400,0,   185280, 0x8f495d37
+0, -17600, -17600,0,   185280, 0x5bfd0e5b
+0, -16800, -16800,0,   185280, 0x60d12d25
+0, -16000, -16000,0,   185280, 0x25aaa51b
+0, -15200, -15200,0,   185280, 0x9cf58bf0
+0, -14400, -14400,0,   185280, 0xd9bd03ea
+0, -13600, -13600,0,   185280, 0xd18be732
+0, -12800, -12800,0,   185280, 0x92763708
+0, -12000, -12000,0,   185280, 0x94b5784a
+0, -11200, -11200,0,   185280, 0x32b184c9
+0, -10400, -10400,0,   185280, 0xe316fec3
+0,  -9600,  -9600,0,   185280, 0x6344ec88
+0,  -8800,  -8800,0,   185280, 0xe0aa6de4
+0,  -8000,  -8000,0,   185280, 0x6cfc8687
+0,  -7200,  -7200,0,   185280, 0x26ddc189
+0,  -6400,  -6400,0,   185280, 0x5a0c1b38
+0,  -5600,  -5600,0,   185280, 0x79a88cb9

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: vp9itxfm: Reorder iadst16 coeffs

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Dec 31 
22:27:13 2016 +0200| [08074c092d8c97d71c5986e5325e97ffc956119d] | committer: 
Martin Storsjö

arm: vp9itxfm: Reorder iadst16 coeffs

This matches the order they are in the 16 bpp version.

There they are in this order, to make sure we access them in the
same order they are declared, easing loading only half of the
coefficients at a time.

This makes the 8 bpp version match the 16 bpp version better.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=08074c092d8c97d71c5986e5325e97ffc956119d
---

 libavcodec/arm/vp9itxfm_neon.S | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 1d4d6a7910..a612b25f4f 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -37,8 +37,8 @@ idct_coeffs:
 endconst
 
 const iadst16_coeffs, align=4
-.short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
-.short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
+.short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+.short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
 endconst
 
 @ Do four 4x4 transposes, using q registers for the subtransposes that don't
@@ -678,19 +678,19 @@ function iadst16
 vld1.16 {q0-q1}, [r12,:128]
 
 mbutterfly_lq3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = t0
-mbutterfly_lq5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = t8
+mbutterfly_lq5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = t8
 butterfly_n d31, d24, q3,  q5,  q6,  q5  @ d31 = t1a,  d24 = 
t9a
 mbutterfly_lq7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = t2
 butterfly_n d16, d23, q2,  q4,  q3,  q4  @ d16 = t0a,  d23 = 
t8a
 
-mbutterfly_lq3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = 
t10
+mbutterfly_lq3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = 
t10
 butterfly_n d29, d26, q7,  q3,  q4,  q3  @ d29 = t3a,  d26 = 
t11a
-mbutterfly_lq5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = t4
+mbutterfly_lq5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = t4
 butterfly_n d18, d21, q6,  q2,  q3,  q2  @ d18 = t2a,  d21 = 
t10a
 
 mbutterfly_lq7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = 
t12
 butterfly_n d20, d28, q5,  q7,  q2,  q7  @ d20 = t5a,  d28 = 
t13a
-mbutterfly_lq3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = t6
+mbutterfly_lq3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = t6
 butterfly_n d27, d19, q4,  q6,  q5,  q6  @ d27 = t4a,  d19 = 
t12a
 
 mbutterfly_lq5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = 
t14

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 16:13:01 
2017 -0300| [9f5d238a66eab835a3137fbf014b7d85f3172cd7] | committer: James Almer

Merge commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a'

* commit 'b8f66c0838b4c645227f23a35b4d54373da4c60a':
  aarch64: vp9itxfm: Reorder iadst16 coeffs
  arm: vp9itxfm: Reorder iadst16 coeffs
  aarch64: vp9itxfm: Reorder the idct coefficients for better pairing
  arm: vp9itxfm: Reorder the idct coefficients for better pairing
  aarch64: vp9itxfm: Avoid reloading the idct32 coefficients
  arm: vp9itxfm: Avoid reloading the idct32 coefficients
  arm: vp9lpf: Implement the mix2_44 function with one single filter pass
  aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1
  arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit

This commit is a noop, see
3fbbad29847c79f422128ad88f174c53a5f6c449
f32690a298badbf2df66319e9b38236ad3d3e321
a88db8b9a016fe47997029e3653cdac4777994b4
600f4c9b03b8d39b986a00dd9dafa61be7d86a72
2905657b902fea8718434f0d29056cf4e7434307
4f693b56bdcfda37b4f2c48b39dcf12439c149c8
f952273019984da5e7bfa1298e1cdb0683049296
b2e20d89844b51c3d9565b293606d1433bd67f25
26ee83acc4ebd765529b666c7f050243b7677d76

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9f5d238a66eab835a3137fbf014b7d85f3172cd7
---



___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] aarch64: vp9itxfm: Reorder the idct coefficients for better pairing

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Dec 31 
14:18:31 2016 +0200| [09eb88a12e008d10a3f7a6be75d18ad98b368e68] | committer: 
Martin Storsjö

aarch64: vp9itxfm: Reorder the idct coefficients for better pairing

All elements are used pairwise, except for the first one.
Previously, the 16th element was unused. Move the unused element
to the second slot, to make the later element pairs not split
across registers.

This simplifies loading only parts of the coefficients,
reducing the difference to the 16 bpp version.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=09eb88a12e008d10a3f7a6be75d18ad98b368e68
---

 libavcodec/aarch64/vp9itxfm_neon.S | 124 ++---
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/libavcodec/aarch64/vp9itxfm_neon.S 
b/libavcodec/aarch64/vp9itxfm_neon.S
index b6c2575236..d4fc2163aa 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -22,7 +22,7 @@
 #include "neon.S"
 
 const itxfm4_coeffs, align=4
-.short  11585, 6270, 15137, 0
+.short  11585, 0, 6270, 15137
 iadst4_coeffs:
 .short  5283, 15212, 9929, 13377
 endconst
@@ -30,8 +30,8 @@ endconst
 const iadst8_coeffs, align=4
 .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
 idct_coeffs:
-.short  11585, 6270, 15137, 3196, 16069, 13623, 9102, 1606
-.short  16305, 12665, 10394, 7723, 14449, 15679, 4756, 0
+.short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
+.short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
 .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
 .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
 endconst
@@ -192,14 +192,14 @@ endconst
 .endm
 
 .macro idct4 c0, c1, c2, c3
-smull   v22.4s,\c1\().4h, v0.h[2]
-smull   v20.4s,\c1\().4h, v0.h[1]
+smull   v22.4s,\c1\().4h, v0.h[3]
+smull   v20.4s,\c1\().4h, v0.h[2]
 add v16.4h,\c0\().4h, \c2\().4h
 sub v17.4h,\c0\().4h, \c2\().4h
-smlal   v22.4s,\c3\().4h, v0.h[1]
+smlal   v22.4s,\c3\().4h, v0.h[2]
 smull   v18.4s,v16.4h,v0.h[0]
 smull   v19.4s,v17.4h,v0.h[0]
-smlsl   v20.4s,\c3\().4h, v0.h[2]
+smlsl   v20.4s,\c3\().4h, v0.h[3]
 rshrn   v22.4h,v22.4s,#14
 rshrn   v18.4h,v18.4s,#14
 rshrn   v19.4h,v19.4s,#14
@@ -326,9 +326,9 @@ itxfm_func4x4 iwht,  iwht
 
 .macro idct8
 dmbutterfly0v16, v20, v16, v20, v2, v3, v4, v5, v6, v7 // v16 = 
t0a, v20 = t1a
-dmbutterfly v18, v22, v0.h[1], v0.h[2], v2, v3, v4, v5 // v18 = 
t2a, v22 = t3a
-dmbutterfly v17, v23, v0.h[3], v0.h[4], v2, v3, v4, v5 // v17 = 
t4a, v23 = t7a
-dmbutterfly v21, v19, v0.h[5], v0.h[6], v2, v3, v4, v5 // v21 = 
t5a, v19 = t6a
+dmbutterfly v18, v22, v0.h[2], v0.h[3], v2, v3, v4, v5 // v18 = 
t2a, v22 = t3a
+dmbutterfly v17, v23, v0.h[4], v0.h[5], v2, v3, v4, v5 // v17 = 
t4a, v23 = t7a
+dmbutterfly v21, v19, v0.h[6], v0.h[7], v2, v3, v4, v5 // v21 = 
t5a, v19 = t6a
 
 butterfly_8hv24, v25, v16, v22 // v24 = t0, v25 = t3
 butterfly_8hv28, v29, v17, v21 // v28 = t4, v29 = t5a
@@ -361,8 +361,8 @@ itxfm_func4x4 iwht,  iwht
 dmbutterfly0v19, v20, v6, v7, v24, v26, v27, v28, v29, v30   // 
v19 = -out[3], v20 = out[4]
 neg v19.8h,   v19.8h  // v19 = out[3]
 
-dmbutterfly_l   v26, v27, v28, v29, v5,  v3,  v0.h[1], v0.h[2]   // 
v26,v27 = t5a, v28,v29 = t4a
-dmbutterfly_l   v2,  v3,  v4,  v5,  v31, v25, v0.h[2], v0.h[1]   // 
v2,v3   = t6a, v4,v5   = t7a
+dmbutterfly_l   v26, v27, v28, v29, v5,  v3,  v0.h[2], v0.h[3]   // 
v26,v27 = t5a, v28,v29 = t4a
+dmbutterfly_l   v2,  v3,  v4,  v5,  v31, v25, v0.h[3], v0.h[2]   // 
v2,v3   = t6a, v4,v5   = t7a
 
 dbutterfly_nv17, v30, v28, v29, v2,  v3,  v6,  v7,  v24, v25 // 
v17 = -out[1], v30 = t6
 dbutterfly_nv22, v31, v26, v27, v4,  v5,  v6,  v7,  v24, v25 // 
v22 = out[6],  v31 = t7
@@ -543,13 +543,13 @@ endfunc
 
 function idct16
 dmbutterfly0v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = 
t0a,  v24 = t1a
-dmbutterfly v20, v28, v0.h[1], v0.h[2], v2, v3, v4, v5 // v20 = 
t2a,  v28 = t3a
-dmbutterfly v18, v30, v0.h[3], v0.h[4], v2, v3, v4, v5 // v18 = 
t4a,  v30 = t7a
-dmbutterfly v26, v22, v0.h[5], v0.h[6], v2, v3, v4, v5 // v26 = 
t5a,  v22 = t6a
-dmbutterfly v17, v31, v0.h[7], v1.h[0], v2, v3, v4, v5 // v17 = 
t8a,  v31 = t15a
-dmbutterfly v25, v23, v1.h[1], v1.h[2], v2, v3, v4, v5 // v25 = 
t9a,  v23 = 

[FFmpeg-cvslog] aarch64: vp9itxfm: Reorder iadst16 coeffs

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Dec 31 
22:27:13 2016 +0200| [b8f66c0838b4c645227f23a35b4d54373da4c60a] | committer: 
Martin Storsjö

aarch64: vp9itxfm: Reorder iadst16 coeffs

This matches the order they are in the 16 bpp version.

There they are in this order, to make sure we access them in the
same order they are declared, easing loading only half of the
coefficients at a time.

This makes the 8 bpp version match the 16 bpp version better.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b8f66c0838b4c645227f23a35b4d54373da4c60a
---

 libavcodec/aarch64/vp9itxfm_neon.S | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/aarch64/vp9itxfm_neon.S 
b/libavcodec/aarch64/vp9itxfm_neon.S
index d4fc2163aa..93dc736f01 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -37,8 +37,8 @@ idct_coeffs:
 endconst
 
 const iadst16_coeffs, align=4
-.short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
-.short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
+.short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+.short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
 endconst
 
 // out1 = ((in1 + in2) * v0[0] + (1 << 13)) >> 14
@@ -628,19 +628,19 @@ function iadst16
 ld1 {v0.8h,v1.8h}, [x11]
 
 dmbutterfly_l   v6,  v7,  v4,  v5,  v31, v16, v0.h[1], v0.h[0]   // 
v6,v7   = t1,   v4,v5   = t0
-dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v1.h[1], v1.h[0]   // 
v10,v11 = t9,   v8,v9   = t8
+dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v0.h[5], v0.h[4]   // 
v10,v11 = t9,   v8,v9   = t8
 dbutterfly_nv31, v24, v6,  v7,  v10, v11, v12, v13, v10, v11 // 
v31 = t1a,  v24 = t9a
 dmbutterfly_l   v14, v15, v12, v13, v29, v18, v0.h[3], v0.h[2]   // 
v14,v15 = t3,   v12,v13 = t2
 dbutterfly_nv16, v23, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // 
v16 = t0a,  v23 = t8a
 
-dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v1.h[3], v1.h[2]   // 
v6,v7   = t11,  v4,v5   = t10
+dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v0.h[7], v0.h[6]   // 
v6,v7   = t11,  v4,v5   = t10
 dbutterfly_nv29, v26, v14, v15, v6,  v7,  v8,  v9,  v6,  v7  // 
v29 = t3a,  v26 = t11a
-dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v0.h[5], v0.h[4]   // 
v10,v11 = t5,   v8,v9   = t4
+dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v1.h[1], v1.h[0]   // 
v10,v11 = t5,   v8,v9   = t4
 dbutterfly_nv18, v21, v12, v13, v4,  v5,  v6,  v7,  v4,  v5  // 
v18 = t2a,  v21 = t10a
 
 dmbutterfly_l   v14, v15, v12, v13, v19, v28, v1.h[5], v1.h[4]   // 
v14,v15 = t13,  v12,v13 = t12
 dbutterfly_nv20, v28, v10, v11, v14, v15, v4,  v5,  v14, v15 // 
v20 = t5a,  v28 = t13a
-dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v0.h[7], v0.h[6]   // 
v6,v7   = t7,   v4,v5   = t6
+dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v1.h[3], v1.h[2]   // 
v6,v7   = t7,   v4,v5   = t6
 dbutterfly_nv27, v19, v8,  v9,  v12, v13, v10, v11, v12, v13 // 
v27 = t4a,  v19 = t12a
 
 dmbutterfly_l   v10, v11, v8,  v9,  v17, v30, v1.h[7], v1.h[6]   // 
v10,v11 = t15,  v8,v9   = t14

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] arm: vp9itxfm: Reorder the idct coefficients for better pairing

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Dec 31 
14:05:44 2016 +0200| [de06bdfe6c8abd8266d5c6f5c68e4df0060b61fc] | committer: 
Martin Storsjö

arm: vp9itxfm: Reorder the idct coefficients for better pairing

All elements are used pairwise, except for the first one.
Previously, the 16th element was unused. Move the unused element
to the second slot, to make the later element pairs not split
across registers.

This simplifies loading only parts of the coefficients,
reducing the difference to the 16 bpp version.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=de06bdfe6c8abd8266d5c6f5c68e4df0060b61fc
---

 libavcodec/arm/vp9itxfm_neon.S | 124 -
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index bed502eba2..1d4d6a7910 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -22,7 +22,7 @@
 #include "neon.S"
 
 const itxfm4_coeffs, align=4
-.short  11585, 6270, 15137, 0
+.short  11585, 0, 6270, 15137
 iadst4_coeffs:
 .short  5283, 15212, 9929, 13377
 endconst
@@ -30,8 +30,8 @@ endconst
 const iadst8_coeffs, align=4
 .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
 idct_coeffs:
-.short  11585, 6270, 15137, 3196, 16069, 13623, 9102, 1606
-.short  16305, 12665, 10394, 7723, 14449, 15679, 4756, 0
+.short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
+.short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
 .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
 .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
 endconst
@@ -224,14 +224,14 @@ endconst
 .endm
 
 .macro idct4 c0, c1, c2, c3
-vmull.s16   q13,  \c1,  d0[2]
-vmull.s16   q11,  \c1,  d0[1]
+vmull.s16   q13,  \c1,  d0[3]
+vmull.s16   q11,  \c1,  d0[2]
 vadd.i16d16,  \c0,  \c2
 vsub.i16d17,  \c0,  \c2
-vmlal.s16   q13,  \c3,  d0[1]
+vmlal.s16   q13,  \c3,  d0[2]
 vmull.s16   q9,   d16,  d0[0]
 vmull.s16   q10,  d17,  d0[0]
-vmlsl.s16   q11,  \c3,  d0[2]
+vmlsl.s16   q11,  \c3,  d0[3]
 vrshrn.s32  d26,  q13,  #14
 vrshrn.s32  d18,  q9,   #14
 vrshrn.s32  d20,  q10,  #14
@@ -350,9 +350,9 @@ itxfm_func4x4 iwht,  iwht
 
 .macro idct8
 dmbutterfly0d16, d17, d24, d25, q8,  q12, q2, q4, d4, d5, d8, d9, 
q3, q2, q5, q4 @ q8 = t0a, q12 = t1a
-dmbutterfly d20, d21, d28, d29, d0[1], d0[2], q2,  q3,  q4,  q5 @ 
q10 = t2a, q14 = t3a
-dmbutterfly d18, d19, d30, d31, d0[3], d1[0], q2,  q3,  q4,  q5 @ 
q9  = t4a, q15 = t7a
-dmbutterfly d26, d27, d22, d23, d1[1], d1[2], q2,  q3,  q4,  q5 @ 
q13 = t5a, q11 = t6a
+dmbutterfly d20, d21, d28, d29, d0[2], d0[3], q2,  q3,  q4,  q5 @ 
q10 = t2a, q14 = t3a
+dmbutterfly d18, d19, d30, d31, d1[0], d1[1], q2,  q3,  q4,  q5 @ 
q9  = t4a, q15 = t7a
+dmbutterfly d26, d27, d22, d23, d1[2], d1[3], q2,  q3,  q4,  q5 @ 
q13 = t5a, q11 = t6a
 
 butterfly   q2,  q14, q8,  q14 @ q2 = t0, q14 = t3
 butterfly   q3,  q10, q12, q10 @ q3 = t1, q10 = t2
@@ -386,8 +386,8 @@ itxfm_func4x4 iwht,  iwht
 vneg.s16q15, q15  @ q15 = out[7]
 butterfly   q8,  q9,  q11, q9 @ q8 = out[0], q9 = t2
 
-dmbutterfly_l   q10, q11, q5,  q7,  d4,  d5,  d6,  d7,  d0[1], d0[2] @ 
q10,q11 = t5a, q5,q7 = t4a
-dmbutterfly_l   q2,  q3,  q13, q14, d12, d13, d8,  d9,  d0[2], d0[1] @ 
q2,q3 = t6a, q13,q14 = t7a
+dmbutterfly_l   q10, q11, q5,  q7,  d4,  d5,  d6,  d7,  d0[2], d0[3] @ 
q10,q11 = t5a, q5,q7 = t4a
+dmbutterfly_l   q2,  q3,  q13, q14, d12, d13, d8,  d9,  d0[3], d0[2] @ 
q2,q3 = t6a, q13,q14 = t7a
 
 dbutterfly_nd28, d29, d8,  d9,  q10, q11, q13, q14, q4,  q6,  q10, 
q11 @ q14 = out[6], q4 = t7
 
@@ -594,13 +594,13 @@ endfunc
 
 function idct16
 mbutterfly0 d16, d24, d16, d24, d4, d6,  q2,  q3 @ d16 = t0a,  d24 
= t1a
-mbutterfly  d20, d28, d0[1], d0[2], q2,  q3  @ d20 = t2a,  d28 = 
t3a
-mbutterfly  d18, d30, d0[3], d1[0], q2,  q3  @ d18 = t4a,  d30 = 
t7a
-mbutterfly  d26, d22, d1[1], d1[2], q2,  q3  @ d26 = t5a,  d22 = 
t6a
-mbutterfly  d17, d31, d1[3], d2[0], q2,  q3  @ d17 = t8a,  d31 = 
t15a
-mbutterfly  d25, d23, d2[1], d2[2], q2,  q3  @ d25 = t9a,  d23 = 
t14a
-mbutterfly  d21, d27, d2[3], d3[0], q2,  q3  @ d21 = t10a, d27 = 
t13a
-mbutterfly  d29, d19, d3[1], d3[2], q2,  q3  @ d29 = t11a, d19 = 
t12a
+mbutterfly  d20, d28, d0[2], d0[3], q2,  q3  @ d20 = t2a,  d28 = 
t3a
+mbutterfly  d18, d30, d1[0], d1[1], q2,  q3  @ d18 = t4a,  

[FFmpeg-cvslog] arm: vp9itxfm: Avoid reloading the idct32 coefficients

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Mon Jan  2 
22:50:38 2017 +0200| [402546a17233a8815307df9e14ff88cd70424537] | committer: 
Martin Storsjö

arm: vp9itxfm: Avoid reloading the idct32 coefficients

The idct32x32 function actually pushed q4-q7 onto the stack even
though it didn't clobber them; there are plenty of registers that
can be used to allow keeping all the idct coefficients in registers
without having to reload different subsets of them at different
stages in the transform.

Since the idct16 core transform avoids clobbering q4-q7 (but clobbers
q2-q3 instead, to avoid needing to back up and restore q4-q7 at all
in the idct16 function), and the lanewise vmul needs a register in
the q0-q3 range, we move the stored coefficients from q2-q3 into q4-q5
while doing idct16.

While keeping these coefficients in registers, we still can skip pushing
q7.

Before:  Cortex A7   A8   A9  A53
vp9_inv_dct_dct_32x32_sub32_add_neon:  18553.8  17182.7  14303.3  12089.7
After:
vp9_inv_dct_dct_32x32_sub32_add_neon:  18470.3  16717.7  14173.6  11860.8

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=402546a17233a8815307df9e14ff88cd70424537
---

 libavcodec/arm/vp9itxfm_neon.S | 246 -
 1 file changed, 120 insertions(+), 126 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 8dc4bbfa55..bed502eba2 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -1185,58 +1185,51 @@ function idct32x32_dc_add_neon
 endfunc
 
 .macro idct32_end
-butterfly   d16, d5,  d4,  d5  @ d16 = t16a, d5  = t19a
+butterfly   d16, d9,  d8,  d9  @ d16 = t16a, d9  = t19a
 butterfly   d17, d20, d23, d20 @ d17 = t17,  d20 = t18
-butterfly   d18, d6,  d7,  d6  @ d18 = t23a, d6  = t20a
+butterfly   d18, d10, d11, d10 @ d18 = t23a, d10 = t20a
 butterfly   d19, d21, d22, d21 @ d19 = t22,  d21 = t21
-butterfly   d4,  d28, d28, d30 @ d4  = t24a, d28 = t27a
+butterfly   d8,  d28, d28, d30 @ d8  = t24a, d28 = t27a
 butterfly   d23, d26, d25, d26 @ d23 = t25,  d26 = t26
-butterfly   d7,  d29, d29, d31 @ d7  = t31a, d29 = t28a
+butterfly   d11, d29, d29, d31 @ d11 = t31a, d29 = t28a
 butterfly   d22, d27, d24, d27 @ d22 = t30,  d27 = t29
 
 mbutterfly  d27, d20, d0[1], d0[2], q12, q15@ d27 = t18a, 
d20 = t29a
-mbutterfly  d29, d5,  d0[1], d0[2], q12, q15@ d29 = t19,  
d5  = t28
-mbutterfly  d28, d6,  d0[1], d0[2], q12, q15, neg=1 @ d28 = t27,  
d6  = t20
+mbutterfly  d29, d9,  d0[1], d0[2], q12, q15@ d29 = t19,  
d9  = t28
+mbutterfly  d28, d10, d0[1], d0[2], q12, q15, neg=1 @ d28 = t27,  
d10 = t20
 mbutterfly  d26, d21, d0[1], d0[2], q12, q15, neg=1 @ d26 = t26a, 
d21 = t21a
 
-butterfly   d31, d24, d7,  d4  @ d31 = t31,  d24 = t24
+butterfly   d31, d24, d11, d8  @ d31 = t31,  d24 = t24
 butterfly   d30, d25, d22, d23 @ d30 = t30a, d25 = t25a
 butterfly_r d23, d16, d16, d18 @ d23 = t23,  d16 = t16
 butterfly_r d22, d17, d17, d19 @ d22 = t22a, d17 = t17a
 butterfly   d18, d21, d27, d21 @ d18 = t18,  d21 = t21
-butterfly_r d27, d28, d5,  d28 @ d27 = t27a, d28 = t28a
-butterfly   d4,  d26, d20, d26 @ d4  = t29,  d26 = t26
-butterfly   d19, d20, d29, d6  @ d19 = t19a, d20 = t20
-vmovd29, d4@ d29 = t29
-
-mbutterfly0 d27, d20, d27, d20, d4, d6, q2, q3 @ d27 = t27,  d20 = 
t20
-mbutterfly0 d26, d21, d26, d21, d4, d6, q2, q3 @ d26 = t26a, d21 = 
t21a
-mbutterfly0 d25, d22, d25, d22, d4, d6, q2, q3 @ d25 = t25,  d22 = 
t22
-mbutterfly0 d24, d23, d24, d23, d4, d6, q2, q3 @ d24 = t24a, d23 = 
t23a
+butterfly_r d27, d28, d9,  d28 @ d27 = t27a, d28 = t28a
+butterfly   d8,  d26, d20, d26 @ d8  = t29,  d26 = t26
+butterfly   d19, d20, d29, d10 @ d19 = t19a, d20 = t20
+vmovd29, d8@ d29 = t29
+
+mbutterfly0 d27, d20, d27, d20, d8, d10, q4, q5 @ d27 = t27,  d20 
= t20
+mbutterfly0 d26, d21, d26, d21, d8, d10, q4, q5 @ d26 = t26a, d21 
= t21a
+mbutterfly0 d25, d22, d25, d22, d8, d10, q4, q5 @ d25 = t25,  d22 
= t22
+mbutterfly0 d24, d23, d24, d23, d8, d10, q4, q5 @ d24 = t24a, d23 
= t23a
 bx  lr
 .endm
 
 function idct32_odd
-movrel  r12, idct_coeffs
-add r12, r12, #32
-vld1.16 {q0-q1}, [r12,:128]
-
-mbutterfly  d16, d31, d0[0], d0[1], q2, q3 @ d16 = t16a, d31 = t31a
-mbutterfly  d24, d23, d0[2], d0[3], q2, q3 @ 

[FFmpeg-cvslog] aarch64: vp9itxfm: Avoid reloading the idct32 coefficients

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Mon Jan  2 
22:08:41 2017 +0200| [65aa002d54433154a6924dc13e498bec98451ad0] | committer: 
Martin Storsjö

aarch64: vp9itxfm: Avoid reloading the idct32 coefficients

The idct32x32 function actually pushed d8-d15 onto the stack even
though it didn't clobber them; there are plenty of registers that
can be used to allow keeping all the idct coefficients in registers
without having to reload different subsets of them at different
stages in the transform.

After this, we still can skip pushing d12-d15.

Before:
vp9_inv_dct_dct_32x32_sub32_add_neon: 8128.3
After:
vp9_inv_dct_dct_32x32_sub32_add_neon: 8053.3

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=65aa002d54433154a6924dc13e498bec98451ad0
---

 libavcodec/aarch64/vp9itxfm_neon.S | 110 +++--
 1 file changed, 43 insertions(+), 67 deletions(-)

diff --git a/libavcodec/aarch64/vp9itxfm_neon.S 
b/libavcodec/aarch64/vp9itxfm_neon.S
index d35f103a79..b6c2575236 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -1123,18 +1123,14 @@ endfunc
 .endm
 
 function idct32_odd
-ld1 {v0.8h,v1.8h}, [x11]
-
-dmbutterfly v16, v31, v0.h[0], v0.h[1], v4, v5, v6, v7 // v16 = 
t16a, v31 = t31a
-dmbutterfly v24, v23, v0.h[2], v0.h[3], v4, v5, v6, v7 // v24 = 
t17a, v23 = t30a
-dmbutterfly v20, v27, v0.h[4], v0.h[5], v4, v5, v6, v7 // v20 = 
t18a, v27 = t29a
-dmbutterfly v28, v19, v0.h[6], v0.h[7], v4, v5, v6, v7 // v28 = 
t19a, v19 = t28a
-dmbutterfly v18, v29, v1.h[0], v1.h[1], v4, v5, v6, v7 // v18 = 
t20a, v29 = t27a
-dmbutterfly v26, v21, v1.h[2], v1.h[3], v4, v5, v6, v7 // v26 = 
t21a, v21 = t26a
-dmbutterfly v22, v25, v1.h[4], v1.h[5], v4, v5, v6, v7 // v22 = 
t22a, v25 = t25a
-dmbutterfly v30, v17, v1.h[6], v1.h[7], v4, v5, v6, v7 // v30 = 
t23a, v17 = t24a
-
-ld1 {v0.8h}, [x10]
+dmbutterfly v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = 
t16a, v31 = t31a
+dmbutterfly v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = 
t17a, v23 = t30a
+dmbutterfly v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = 
t18a, v27 = t29a
+dmbutterfly v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = 
t19a, v19 = t28a
+dmbutterfly v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = 
t20a, v29 = t27a
+dmbutterfly v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = 
t21a, v21 = t26a
+dmbutterfly v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = 
t22a, v25 = t25a
+dmbutterfly v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = 
t23a, v17 = t24a
 
 butterfly_8hv4,  v24, v16, v24 // v4  = t16, v24 = t17
 butterfly_8hv5,  v20, v28, v20 // v5  = t19, v20 = t18
@@ -1153,18 +1149,14 @@ function idct32_odd
 endfunc
 
 function idct32_odd_half
-ld1 {v0.8h,v1.8h}, [x11]
-
-dmbutterfly_h1  v16, v31, v0.h[0], v0.h[1], v4, v5, v6, v7 // v16 = 
t16a, v31 = t31a
-dmbutterfly_h2  v24, v23, v0.h[2], v0.h[3], v4, v5, v6, v7 // v24 = 
t17a, v23 = t30a
-dmbutterfly_h1  v20, v27, v0.h[4], v0.h[5], v4, v5, v6, v7 // v20 = 
t18a, v27 = t29a
-dmbutterfly_h2  v28, v19, v0.h[6], v0.h[7], v4, v5, v6, v7 // v28 = 
t19a, v19 = t28a
-dmbutterfly_h1  v18, v29, v1.h[0], v1.h[1], v4, v5, v6, v7 // v18 = 
t20a, v29 = t27a
-dmbutterfly_h2  v26, v21, v1.h[2], v1.h[3], v4, v5, v6, v7 // v26 = 
t21a, v21 = t26a
-dmbutterfly_h1  v22, v25, v1.h[4], v1.h[5], v4, v5, v6, v7 // v22 = 
t22a, v25 = t25a
-dmbutterfly_h2  v30, v17, v1.h[6], v1.h[7], v4, v5, v6, v7 // v30 = 
t23a, v17 = t24a
-
-ld1 {v0.8h}, [x10]
+dmbutterfly_h1  v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = 
t16a, v31 = t31a
+dmbutterfly_h2  v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = 
t17a, v23 = t30a
+dmbutterfly_h1  v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = 
t18a, v27 = t29a
+dmbutterfly_h2  v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = 
t19a, v19 = t28a
+dmbutterfly_h1  v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = 
t20a, v29 = t27a
+dmbutterfly_h2  v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = 
t21a, v21 = t26a
+dmbutterfly_h1  v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = 
t22a, v25 = t25a
+dmbutterfly_h2  v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = 
t23a, v17 = t24a
 
 butterfly_8hv4,  v24, v16, v24 // v4  = t16, v24 = t17
 butterfly_8hv5,  v20, v28, v20 // v5  = t19, v20 = t18
@@ -1183,18 +1175,14 @@ function idct32_odd_half
 endfunc
 
 function idct32_odd_quarter
-ld1 {v0.8h,v1.8h}, [x11]
-
-dsmull_hv4,  v5,  v16, v0.h[0]
-

[FFmpeg-cvslog] arm: vp9lpf: Implement the mix2_44 function with one single filter pass

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Jan 14 
13:22:30 2017 +0200| [575e31e931e4178e9f1e24407503c9b4ec0ef9ba] | committer: 
Martin Storsjö

arm: vp9lpf: Implement the mix2_44 function with one single filter pass

For this case, with 8 inputs but only changing 4 of them, we can fit
all 16 input pixels into a q register, and still have enough temporary
registers for doing the loop filter.

The wd=8 filters would require too many temporary registers for
processing all 16 pixels at once though.

Before:  Cortex A7  A8 A9 A53
vp9_loop_filter_mix2_v_44_16_neon:   289.7   256.2  237.5   181.2
After:
vp9_loop_filter_mix2_v_44_16_neon:   221.2   150.5  177.7   138.0

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=575e31e931e4178e9f1e24407503c9b4ec0ef9ba
---

 libavcodec/arm/vp9dsp_init_arm.c |   7 +-
 libavcodec/arm/vp9lpf_neon.S | 191 +++
 2 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/libavcodec/arm/vp9dsp_init_arm.c b/libavcodec/arm/vp9dsp_init_arm.c
index e99d931674..1ede1708e7 100644
--- a/libavcodec/arm/vp9dsp_init_arm.c
+++ b/libavcodec/arm/vp9dsp_init_arm.c
@@ -194,6 +194,8 @@ define_loop_filters(8, 8);
 define_loop_filters(16, 8);
 define_loop_filters(16, 16);
 
+define_loop_filters(44, 16);
+
 #define lf_mix_fn(dir, wd1, wd2, stridea)  
   \
 static void loop_filter_##dir##_##wd1##wd2##_16_neon(uint8_t *dst, 
   \
  ptrdiff_t stride, 
   \
@@ -207,7 +209,6 @@ static void 
loop_filter_##dir##_##wd1##wd2##_16_neon(uint8_t *dst,
 lf_mix_fn(h, wd1, wd2, stride) \
 lf_mix_fn(v, wd1, wd2, sizeof(uint8_t))
 
-lf_mix_fns(4, 4)
 lf_mix_fns(4, 8)
 lf_mix_fns(8, 4)
 lf_mix_fns(8, 8)
@@ -227,8 +228,8 @@ static av_cold void 
vp9dsp_loopfilter_init_arm(VP9DSPContext *dsp)
 dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_neon;
 dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_neon;
 
-dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_neon;
-dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_neon;
+dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_neon;
+dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_neon;
 dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_neon;
 dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_neon;
 dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_neon;
diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index e31c807cc0..12984a900c 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -44,6 +44,109 @@
 vtrn.8  \r2,  \r3
 .endm
 
+@ The input to and output from this macro is in the registers q8-q15,
+@ and q0-q7 are used as scratch registers.
+@ p3 = q8, p0 = q11, q0 = q12, q3 = q15
+.macro loop_filter_q
+vdup.u8 d0,  r2  @ E
+lsr r2,  r2,  #8
+vdup.u8 d2,  r3  @ I
+lsr r3,  r3,  #8
+vdup.u8 d1,  r2  @ E
+vdup.u8 d3,  r3  @ I
+
+vabd.u8 q2,  q8,  q9 @ abs(p3 - p2)
+vabd.u8 q3,  q9,  q10@ abs(p2 - p1)
+vabd.u8 q4,  q10, q11@ abs(p1 - p0)
+vabd.u8 q5,  q12, q13@ abs(q0 - q1)
+vabd.u8 q6,  q13, q14@ abs(q1 - q2)
+vabd.u8 q7,  q14, q15@ abs(q2 - q3)
+vmax.u8 q2,  q2,  q3
+vmax.u8 q3,  q4,  q5
+vmax.u8 q4,  q6,  q7
+vabd.u8 q5,  q11, q12@ abs(p0 - q0)
+vmax.u8 q2,  q2,  q3
+vqadd.u8q5,  q5,  q5 @ abs(p0 - q0) * 2
+vabd.u8 q7,  q10, q13@ abs(p1 - q1)
+vmax.u8 q2,  q2,  q4 @ max(abs(p3 - p2), ..., abs(q2 - q3))
+vshr.u8 q7,  q7,  #1
+vcle.u8 q2,  q2,  q1 @ max(abs()) <= I
+vqadd.u8q5,  q5,  q7 @ abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
+vcle.u8 q5,  q5,  q0
+vandq2,  q2,  q5 @ fm
+
+vshrn.u16   d10, q2,  #4
+vmovr2,  r3,  d10
+orrsr2,  r2,  r3
+@ If no pixels need filtering, just exit as soon as possible
+beq 9f
+
+@ Calculate the normal inner loop filter for 2 or 4 pixels
+ldr r3,  [sp, #64]
+vabd.u8 q3,  q10, q11@ abs(p1 - p0)
+vabd.u8 q4,  q13, q12@ abs(q1 - q0)
+
+vsubl.u8q5,  d20, d26@ p1 - q1
+vsubl.u8q6,  d21, d27@ p1 - q1
+vmax.u8 q3,  q3,  q4 @ max(abs(p1 - p0), abs(q1 - q0))
+

[FFmpeg-cvslog] arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Sat Jan 14 
20:49:19 2017 +0200| [c582cb8537367721bb399a5d01b652c20142b756] | committer: 
Martin Storsjö

arm/aarch64: vp9lpf: Keep the comparison to E within 8 bit

The theoretical maximum value of E is 193, so we can just
saturate the addition to 255.

Before: Cortex A7  A8  A9 A53  A53/AArch64
vp9_loop_filter_v_4_8_neon: 143.0   127.7   114.888.0 87.7
vp9_loop_filter_v_8_8_neon: 241.0   197.2   173.7   140.0136.7
vp9_loop_filter_v_16_8_neon:497.0   419.5   379.7   293.0275.7
vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0452.0
After:
vp9_loop_filter_v_4_8_neon: 136.0   125.7   112.684.0 83.0
vp9_loop_filter_v_8_8_neon: 234.0   195.5   171.5   136.0133.7
vp9_loop_filter_v_16_8_neon:490.0   417.5   377.7   289.0271.0
vp9_loop_filter_v_16_16_neon:   951.2   814.7   732.3   571.0446.7

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c582cb8537367721bb399a5d01b652c20142b756
---

 libavcodec/aarch64/vp9lpf_neon.S | 40 +---
 libavcodec/arm/vp9lpf_neon.S | 11 +--
 2 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 5fafc7ad5c..48cac4cac6 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -51,13 +51,6 @@
 // see the arm version instead.
 
 
-.macro uabdl_sz dst1, dst2, in1, in2, sz
-uabdl   \dst1,  \in1\().8b,  \in2\().8b
-.ifc \sz, .16b
-uabdl2  \dst2,  \in1\().16b, \in2\().16b
-.endif
-.endm
-
 .macro add_sz dst1, dst2, in1, in2, in3, in4, sz
 add \dst1,  \in1,  \in3
 .ifc \sz, .16b
@@ -86,20 +79,6 @@
 .endif
 .endm
 
-.macro cmhs_sz dst1, dst2, in1, in2, in3, in4, sz
-cmhs\dst1,  \in1,  \in3
-.ifc \sz, .16b
-cmhs\dst2,  \in2,  \in4
-.endif
-.endm
-
-.macro xtn_sz dst, in1, in2, sz
-xtn \dst\().8b,  \in1
-.ifc \sz, .16b
-xtn2\dst\().16b, \in2
-.endif
-.endm
-
 .macro usubl_sz dst1, dst2, in1, in2, sz
 usubl   \dst1,  \in1\().8b,  \in2\().8b
 .ifc \sz, .16b
@@ -179,20 +158,20 @@
 // tmpq2 == tmp3 + tmp4, etc.
 .macro loop_filter wd, sz, mix, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
 .if \mix == 0
-dup v0.8h,  w2// E
-dup v1.8h,  w2// E
+dup v0\sz,  w2// E
 dup v2\sz,  w3// I
 dup v3\sz,  w4// H
 .else
-dup v0.8h,  w2// E
+dup v0.8b,  w2// E
 dup v2.8b,  w3// I
 dup v3.8b,  w4// H
+lsr w5, w2,  #8
 lsr w6, w3,  #8
 lsr w7, w4,  #8
-ushrv1.8h,  v0.8h, #8 // E
+dup v1.8b,  w5// E
 dup v4.8b,  w6// I
-bic v0.8h,  #255, lsl 8 // E
 dup v5.8b,  w7// H
+trn1v0.2d,  v0.2d,  v1.2d
 trn1v2.2d,  v2.2d,  v4.2d
 trn1v3.2d,  v3.2d,  v5.2d
 .endif
@@ -206,16 +185,15 @@
 umaxv4\sz,  v4\sz,  v5\sz
 umaxv5\sz,  v6\sz,  v7\sz
 umax\tmp1\sz, \tmp1\sz, \tmp2\sz
-uabdl_szv6.8h,  v7.8h,  v23, v24, \sz // abs(p0 - q0)
+uabdv6\sz,  v23\sz, v24\sz// abs(p0 - q0)
 umaxv4\sz,  v4\sz,  v5\sz
-add_sz  v6.8h,  v7.8h,  v6.8h,  v7.8h,  v6.8h,  v7.8h, \sz // 
abs(p0 - q0) * 2
+uqadd   v6\sz,  v6\sz,  v6\sz // abs(p0 - q0) * 2
 uabdv5\sz,  v22\sz, v25\sz// abs(p1 - q1)
 umaxv4\sz,  v4\sz,  \tmp1\sz  // max(abs(p3 - p2), 
..., abs(q2 - q3))
 ushrv5\sz,  v5\sz,  #1
 cmhsv4\sz,  v2\sz,  v4\sz // max(abs()) <= I
-uaddw_szv6.8h,  v7.8h,  v6.8h,  v7.8h,  v5, \sz // abs(p0 - 
q0) * 2 + abs(p1 - q1) >> 1
-cmhs_sz v6.8h,  v7.8h,  v0.8h,  v1.8h,  v6.8h,  v7.8h, \sz
-xtn_sz  v5, v6.8h,  v7.8h,  \sz
+uqadd   v6\sz,  v6\sz,  v5\sz // abs(p0 - q0) * 2 + 
abs(p1 - q1) >> 1
+cmhsv5\sz,  v0\sz,  v6\sz
 and v4\sz,  v4\sz,  v5\sz // fm
 
 // If no pixels need filtering, just exit as soon as possible
diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index 1e161e0c63..e31c807cc0 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -51,7 +51,7 @@
 @ 

[FFmpeg-cvslog] aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1

2017-10-02 Thread Martin Storsjö
ffmpeg | branch: master | Martin Storsjö  | Thu Feb 23 
23:33:58 2017 +0200| [3bf9c48320f25f3d5557485b0202f22ae60748b0] | committer: 
Martin Storsjö

aarch64: vp9lpf: Use dup+rev16+uzp1 instead of dup+lsr+dup+trn1

This is one cycle faster in total, and three instructions fewer.

Before:
vp9_loop_filter_mix2_v_44_16_neon: 123.2
After:
vp9_loop_filter_mix2_v_44_16_neon: 122.2

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3bf9c48320f25f3d5557485b0202f22ae60748b0
---

 libavcodec/aarch64/vp9lpf_neon.S | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 48cac4cac6..e9c497096b 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -162,18 +162,15 @@
 dup v2\sz,  w3// I
 dup v3\sz,  w4// H
 .else
-dup v0.8b,  w2// E
-dup v2.8b,  w3// I
-dup v3.8b,  w4// H
-lsr w5, w2,  #8
-lsr w6, w3,  #8
-lsr w7, w4,  #8
-dup v1.8b,  w5// E
-dup v4.8b,  w6// I
-dup v5.8b,  w7// H
-trn1v0.2d,  v0.2d,  v1.2d
-trn1v2.2d,  v2.2d,  v4.2d
-trn1v3.2d,  v3.2d,  v5.2d
+dup v0.8h,  w2// E
+dup v2.8h,  w3// I
+dup v3.8h,  w4// H
+rev16   v1.16b, v0.16b// E
+rev16   v4.16b, v2.16b// I
+rev16   v5.16b, v3.16b// H
+uzp1v0.16b, v0.16b, v1.16b
+uzp1v2.16b, v2.16b, v4.16b
+uzp1v3.16b, v3.16b, v5.16b
 .endif
 
 uabdv4\sz,  v20\sz, v21\sz// abs(p3 - p2)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Place attribute_deprecated in the right position for struct declarations

2017-10-02 Thread Diego Biurrun
ffmpeg | branch: master | Diego Biurrun  | Wed Feb 22 
11:39:21 2017 +0100| [ed6a891c364f8b0850b557d9578b8920cc15a937] | committer: 
Diego Biurrun

Place attribute_deprecated in the right position for struct declarations

libavcodec/vaapi.h:58:1: warning: attribute 'deprecated' is ignored, place it 
after "struct" to apply attribute to type declaration [-Wignored-attributes]

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed6a891c364f8b0850b557d9578b8920cc15a937
---

 libavcodec/vaapi.h | 3 +--
 libavcodec/xvmc.h  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index ceb7904bea..391368c85f 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -55,8 +55,7 @@
  *
  * Deprecated: use AVCodecContext.hw_frames_ctx instead.
  */
-attribute_deprecated
-struct vaapi_context {
+struct attribute_deprecated vaapi_context {
 /**
  * Window system dependent data
  *
diff --git a/libavcodec/xvmc.h b/libavcodec/xvmc.h
index 950ed18276..91027b9c2f 100644
--- a/libavcodec/xvmc.h
+++ b/libavcodec/xvmc.h
@@ -45,7 +45,7 @@
 #define AV_XVMC_ID0x1DC711C0  /**< special value to ensure 
that regular pixel routines haven't corrupted the struct
the number is 1337 
speak for the letters IDCT MCo (motion compensation) */
 
-attribute_deprecated struct xvmc_pix_fmt {
+struct attribute_deprecated xvmc_pix_fmt {
 /** The field contains the special constant value AV_XVMC_ID.
 It is used as a test that the application correctly uses the API,
 and that there is no corruption caused by pixel routines.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit 'ed6a891c364f8b0850b557d9578b8920cc15a937'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 16:05:36 
2017 -0300| [f40cd7899188cae676ec01ee817ec764381c8403] | committer: James Almer

Merge commit 'ed6a891c364f8b0850b557d9578b8920cc15a937'

* commit 'ed6a891c364f8b0850b557d9578b8920cc15a937':
  Place attribute_deprecated in the right position for struct declarations

This commit is a noop, see
99530387283fc58e6c3ac42724955b8569daf548
6ff3da4f6a8e460d77bb65fed4267bf48f513fcf

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f40cd7899188cae676ec01ee817ec764381c8403
---



___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 16:00:14 
2017 -0300| [0451c3db4ee21afb437279b656c83269677801e5] | committer: James Almer

Merge commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8'

* commit '04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8':
  mkv: Update the seek test to match 5d3953a5dc
  fate: Update fate-lavf-mkv after commit 5d3953a5dc

This commit is a noop.

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0451c3db4ee21afb437279b656c83269677801e5
---



___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] mkv: Update the seek test to match 5d3953a5dc

2017-10-02 Thread Luca Barbato
ffmpeg | branch: master | Luca Barbato  | Wed Feb 22 
09:55:45 2017 +0100| [04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8] | committer: 
Luca Barbato

mkv: Update the seek test to match 5d3953a5dc

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=04d2afa93b6c6f320ac45dd99ce1226f3c3d5ac8
---

 tests/ref/seek/lavf-mkv | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/ref/seek/lavf-mkv b/tests/ref/seek/lavf-mkv
index 530c8dfe4d..8267a2a255 100644
--- a/tests/ref/seek/lavf-mkv
+++ b/tests/ref/seek/lavf-mkv
@@ -1,53 +1,53 @@
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 1 flags:1 dts: 0.00 pts: 0.00 pos:633 size:   
208
 ret: 0 st:-1 flags:0  ts:-1.00
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st:-1 flags:1  ts: 1.894167
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 0 flags:0  ts: 0.788000
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 0 flags:1  ts:-0.317000
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st: 1 flags:0  ts: 2.577000
 ret:-EOF
 ret: 0 st: 1 flags:1  ts: 1.471000
-ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size:   
209
+ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size:   
209
 ret: 0 st:-1 flags:0  ts: 0.365002
 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 
27925
 ret: 0 st:-1 flags:1  ts:-0.740831
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st: 0 flags:0  ts: 2.153000
 ret:-EOF
 ret: 0 st: 0 flags:1  ts: 1.048000
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 1 flags:0  ts:-0.058000
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 1 flags:1 dts: 0.00 pts: 0.00 pos:633 size:   
208
 ret: 0 st: 1 flags:1  ts: 2.836000
-ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size:   
209
+ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size:   
209
 ret: 0 st:-1 flags:0  ts: 1.730004
 ret:-EOF
 ret: 0 st:-1 flags:1  ts: 0.624171
 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 
27925
 ret: 0 st: 0 flags:0  ts:-0.482000
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st: 0 flags:1  ts: 2.413000
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 1 flags:0  ts: 1.307000
 ret:-EOF
 ret: 0 st: 1 flags:1  ts: 0.201000
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 1 flags:1 dts: 0.183000 pts: 0.183000 pos:  72204 size:   
209
 ret: 0 st:-1 flags:0  ts:-0.904994
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st:-1 flags:1  ts: 1.989173
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 0 flags:0  ts: 0.883000
 ret: 0 st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292271 size: 
27834
 ret: 0 st: 0 flags:1  ts:-0.222000
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837
 ret: 0 st: 1 flags:0  ts: 2.672000
 ret:-EOF
 ret: 0 st: 1 flags:1  ts: 1.566000
-ret: 0 st: 1 flags:1 dts: 0.982000 pts: 0.982000 pos: 320112 size:   
209
+ret: 0 st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320112 size:   
209
 ret: 0 st:-1 flags:0  ts: 0.460008
 ret: 0 st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146824 size: 
27925
 ret: 0 st:-1 flags:1  ts:-0.645825
-ret: 0 st: 1 flags:1 dts:-0.011000 pts:-0.011000 pos:633 size:   
208
+ret: 0 st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:849 size: 
27837

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: Update fate-lavf-mkv after commit 5d3953a5dc

2017-10-02 Thread John Stebbins
ffmpeg | branch: master | John Stebbins  | Tue Feb 21 
16:47:20 2017 -0700| [fec3456ce188e895a2082fc1fb298570fc29ad29] | committer: 
John Stebbins

fate: Update fate-lavf-mkv after commit 5d3953a5dc

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fec3456ce188e895a2082fc1fb298570fc29ad29
---

 tests/ref/lavf/mkv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ref/lavf/mkv b/tests/ref/lavf/mkv
index db0aba0125..04c5f3cac2 100644
--- a/tests/ref/lavf/mkv
+++ b/tests/ref/lavf/mkv
@@ -1,3 +1,3 @@
-76d400179dfd6143f50ea4d19fe8ed99 *./tests/data/lavf/lavf.mkv
+dad336329ef85127f97e9d12a3b57a59 *./tests/data/lavf/lavf.mkv
 320383 ./tests/data/lavf/lavf.mkv
-./tests/data/lavf/lavf.mkv CRC=0x36193cda
+./tests/data/lavf/lavf.mkv CRC=0x63ed3cda

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 15:52:18 
2017 -0300| [ff29db4ebf2245a33e184d79ebb294c71d42ea1f] | committer: James Almer

Merge commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab'

* commit '156bc0193bd47d3f4b3adaa93be0e206e12686ab':
  fate: Add webp alpha test

This commit is a noop, see
bd2cec7021a0e275d4551a56d0a220ba2f9bd691
2bdb26b4eeec8142c927645a10b7f55cae5cdcc5
92bf87db294c0544168f572ba6739db7d2ba2ba3

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ff29db4ebf2245a33e184d79ebb294c71d42ea1f
---



___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] fate: Add webp alpha test

2017-10-02 Thread Mark Thompson
ffmpeg | branch: master | Mark Thompson  | Fri Feb 17 23:13:14 
2017 +| [156bc0193bd47d3f4b3adaa93be0e206e12686ab] | committer: Mark 
Thompson

fate: Add webp alpha test

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=156bc0193bd47d3f4b3adaa93be0e206e12686ab
---

 tests/fate/image.mak  | 11 +--
 tests/ref/fate/{webp => webp-yuv420p} |  0
 tests/ref/fate/webp-yuva420p  |  2 ++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 83fa71a199..7d970312d4 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -223,8 +223,15 @@ FATE_TIFF-$(call DEMDEC, IMAGE2, TIFF) += $(FATE_TIFF)
 FATE_SAMPLES_AVCONV += $(FATE_TIFF-yes)
 fate-tiff: $(FATE_TIFF-yes)
 
-FATE_SAMPLES_AVCONV-$(call DEMDEC, IMAGE2, WEBP) += fate-webp
-fate-webp: CMD = framecrc -i $(TARGET_SAMPLES)/webp/image_small.webp
+FATE_WEBP += fate-webp-yuv420p
+fate-webp-yuv420p: CMD = framecrc -i $(TARGET_SAMPLES)/webp/image_small.webp
+
+FATE_WEBP += fate-webp-yuva420p
+fate-webp-yuva420p: CMD = framecrc -i $(TARGET_SAMPLES)/webp/1_webp_a.webp
+
+FATE_WEBP-$(call DEMDEC, IMAGE2, WEBP) += $(FATE_WEBP)
+FATE_SAMPLES_AVCONV += $(FATE_WEBP-yes)
+fate-webp: $(FATE_WEBP-yes)
 
 FATE_XBM += fate-xbm10
 fate-xbm10: CMD = framecrc -i $(TARGET_SAMPLES)/xbm/xl.xbm
diff --git a/tests/ref/fate/webp b/tests/ref/fate/webp-yuv420p
similarity index 100%
rename from tests/ref/fate/webp
rename to tests/ref/fate/webp-yuv420p
diff --git a/tests/ref/fate/webp-yuva420p b/tests/ref/fate/webp-yuva420p
new file mode 100644
index 00..3350edf2d7
--- /dev/null
+++ b/tests/ref/fate/webp-yuva420p
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,  0,  0,1,   301200, 0x12b071a0

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] matroskaenc: factor ts_offset into block timecode computation

2017-10-02 Thread John Stebbins
ffmpeg | branch: master | John Stebbins  | Wed Feb 15 
15:22:40 2017 -0700| [5d3953a5dcfd5f71391b7f34908517eb6f7e5146] | committer: 
John Stebbins

matroskaenc: factor ts_offset into block timecode computation

ts_offset was added to cluster timecode, but then effectively subtracted
back off the block timecode

When setting initial_padding for an audio stream, the timestamps are
written incorrectly to the mkv file.  cluster timecode gets written
as pts0 + ts_offset which is correct, but then block timecode gets
written as pts - cluster timecode which expanded is
pts - (pts0 + ts_offset).  Adding cluster and block tc back together:
cluster + block = (pts0 + ts_offset) + (pts - (pts0 + ts_offset)) = pts
But the result should be pts + ts_offset since demux will subtract the
CodecDelay element from pts and set initial_padding to CodecDelay.
This patch gives the correct result.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5d3953a5dcfd5f71391b7f34908517eb6f7e5146
---

 libavformat/matroskaenc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index e951a0fb67..2fe6e0ed49 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -1461,6 +1461,7 @@ static void mkv_write_block(AVFormatContext *s, 
AVIOContext *pb,
 uint8_t *data = NULL;
 int offset = 0, size = pkt->size;
 int64_t ts = mkv->tracks[pkt->stream_index].write_dts ? pkt->dts : 
pkt->pts;
+ts += mkv->tracks[pkt->stream_index].ts_offset;
 
 av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, "
"pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", flags 
%d\n",

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Merge commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146'

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 15:26:56 
2017 -0300| [45121cbdda00627c9b51b3fd10ea044ebfaa6664] | committer: James Almer

Merge commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146'

* commit '5d3953a5dcfd5f71391b7f34908517eb6f7e5146':
  matroskaenc: factor ts_offset into block timecode computation

Merged-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=45121cbdda00627c9b51b3fd10ea044ebfaa6664
---

 libavformat/matroskaenc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 17094f82b0..6f094c458c 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -2113,6 +2113,8 @@ static void mkv_write_block(AVFormatContext *s, 
AVIOContext *pb,
 uint8_t track_number = (mkv->is_dash ? mkv->dash_track_number : 
(pkt->stream_index + 1));
 ebml_master block_group, block_additions, block_more;
 
+ts += mkv->tracks[pkt->stream_index].ts_offset;
+
 av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, "
"pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", keyframe 
%d\n",
avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration,


==

diff --cc libavformat/matroskaenc.c
index 17094f82b0,2fe6e0ed49..6f094c458c
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@@ -2105,18 -1458,14 +2105,20 @@@ static void mkv_write_block(AVFormatCon
  {
  MatroskaMuxContext *mkv = s->priv_data;
  AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar;
 -uint8_t *data = NULL;
 -int offset = 0, size = pkt->size;
 +uint8_t *data = NULL, *side_data = NULL;
 +int offset = 0, size = pkt->size, side_data_size = 0;
  int64_t ts = mkv->tracks[pkt->stream_index].write_dts ? pkt->dts : 
pkt->pts;
 +uint64_t additional_id = 0;
 +int64_t discard_padding = 0;
 +uint8_t track_number = (mkv->is_dash ? mkv->dash_track_number : 
(pkt->stream_index + 1));
 +ebml_master block_group, block_additions, block_more;
 +
+ ts += mkv->tracks[pkt->stream_index].ts_offset;
+ 
  av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, "
 -   "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", flags 
%d\n",
 -   avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration, 
flags);
 +   "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", keyframe 
%d\n",
 +   avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration,
 +   keyframe != 0);
  if (par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 &&
  (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
  ff_avc_parse_nal_units_buf(pkt->data, , );

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avdevice/decklink_dec: remove av_dup_packet() usage

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Mon Oct  2 13:08:39 
2017 -0300| [e91f0c4f8b3e81bc63838cc67370a7b13c8d9e78] | committer: James Almer

avdevice/decklink_dec: remove av_dup_packet() usage

Reviewed-by: Marton Balint 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e91f0c4f8b3e81bc63838cc67370a7b13c8d9e78
---

 libavdevice/decklink_dec.cpp | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index 9d12d0fed0..53ff576ec5 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp
@@ -450,22 +450,24 @@ static unsigned long long 
avpacket_queue_size(AVPacketQueue *q)
 static int avpacket_queue_put(AVPacketQueue *q, AVPacket *pkt)
 {
 AVPacketList *pkt1;
+int ret;
 
 // Drop Packet if queue size is > maximum queue size
 if (avpacket_queue_size(q) > (uint64_t)q->max_q_size) {
 av_log(q->avctx, AV_LOG_WARNING,  "Decklink input buffer overrun!\n");
 return -1;
 }
-/* duplicate the packet */
-if (av_dup_packet(pkt) < 0) {
-return -1;
-}
 
-pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList));
+pkt1 = (AVPacketList *)av_mallocz(sizeof(AVPacketList));
 if (!pkt1) {
 return -1;
 }
-pkt1->pkt  = *pkt;
+ret = av_packet_ref(>pkt, pkt);
+av_packet_unref(pkt);
+if (ret < 0) {
+av_free(pkt1);
+return -1;
+}
 pkt1->next = NULL;
 
 pthread_mutex_lock(>mutex);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avdevice/decklink_dec: use av_packet_add_side_data()

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Sun Oct  1 23:31:12 
2017 -0300| [0c1ffd0aa55c6cef6dffe2b736786c6cb86d8a3d] | committer: James Almer

avdevice/decklink_dec: use av_packet_add_side_data()

It uses the existing buffer instead of allocating a new one.

Reviewed-by: Marton Balint 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c1ffd0aa55c6cef6dffe2b736786c6cb86d8a3d
---

 libavdevice/decklink_dec.cpp | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index 8a14094474..9d12d0fed0 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp
@@ -390,10 +390,8 @@ uint8_t *get_metadata(AVFormatContext *avctx, uint16_t 
*buf, size_t width,
 clear_parity_bits(buf, len);
 data = vanc_to_cc(avctx, buf, width, data_len);
 if (data) {
-uint8_t *pkt_cc = av_packet_new_side_data(pkt, 
AV_PKT_DATA_A53_CC, data_len);
-if (pkt_cc)
-memcpy(pkt_cc, data, data_len);
-av_free(data);
+if (av_packet_add_side_data(pkt, AV_PKT_DATA_A53_CC, data, 
data_len) < 0)
+av_free(data);
 }
 } else {
 av_log(avctx, AV_LOG_DEBUG, "Unknown meta data DID = 0x%.2x SDID = 
0x%.2x\n",

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/encode: remove usage of av_dup_packet()

2017-10-02 Thread James Almer
ffmpeg | branch: master | James Almer  | Tue Sep 26 00:24:29 
2017 -0300| [a22c6a4796ca1f2cbee6784262515da876fbec22] | committer: James Almer

avcodec/encode: remove usage of av_dup_packet()

Reviewed-by: wm4 
Signed-off-by: James Almer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a22c6a4796ca1f2cbee6784262515da876fbec22
---

 libavcodec/encode.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index 525ee1f5d6..dd50486bcf 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -222,10 +222,12 @@ int attribute_align_arg 
avcodec_encode_audio2(AVCodecContext *avctx,
 }
 avpkt->buf  = user_pkt.buf;
 avpkt->data = user_pkt.data;
-} else {
-if (av_dup_packet(avpkt) < 0) {
-ret = AVERROR(ENOMEM);
-}
+} else if (!avpkt->buf) {
+AVPacket tmp = { 0 };
+ret = av_packet_ref(, avpkt);
+if (ret < 0)
+return ret;
+*avpkt = tmp;
 }
 }
 
@@ -318,10 +320,12 @@ int attribute_align_arg 
avcodec_encode_video2(AVCodecContext *avctx,
 }
 avpkt->buf  = user_pkt.buf;
 avpkt->data = user_pkt.data;
-} else {
-if (av_dup_packet(avpkt) < 0) {
-ret = AVERROR(ENOMEM);
-}
+} else if (!avpkt->buf) {
+AVPacket tmp = { 0 };
+ret = av_packet_ref(, avpkt);
+if (ret < 0)
+return ret;
+*avpkt = tmp;
 }
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog