[libav-commits] h264_cabac: Fix CABAC+8x8dct in 4:4:4

2017-08-14 Thread Anton Mitrofanov
Module: libav
Branch: release/12
Commit: 5f13aa25667e148e1c3a78e55a3d1b11d56897c1

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Sean McGovern <gsean...@gmail.com>
Date:  Thu Jun 15 18:56:14 2017 -0400

h264_cabac: Fix CABAC+8x8dct in 4:4:4

Use the correct ctxIdxInc calculation for coded_block_flag.
Keep old behavior for old versions of x264 for backward compatibility.

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov <an...@khirnov.net>
(cherry picked from commit 18d3f36d3c4d0f2c3e702f970ff8b457d7d5e39c)

Signed-off-by: Sean McGovern <gsean...@gmail.com>

---

 libavcodec/h264_cabac.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 248c2d5..49a111b 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2329,21 +2329,40 @@ decode_intra_mb:
 if (CHROMA444(h) && IS_8x8DCT(mb_type)){
 int i;
 uint8_t *nnz_cache = sl->non_zero_count_cache;
-for (i = 0; i < 2; i++){
-if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) {
-nnz_cache[3+8* 1 + 2*8*i]=
-nnz_cache[3+8* 2 + 2*8*i]=
-nnz_cache[3+8* 6 + 2*8*i]=
-nnz_cache[3+8* 7 + 2*8*i]=
-nnz_cache[3+8*11 + 2*8*i]=
-nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+if (h->x264_build < 151U) {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
+}
+} else {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= 
!IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 
0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
 }
-}
-if (sl->top_type && !IS_8x8DCT(sl->top_type)){
-uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
-AV_WN32A(_cache[4+8* 0], top_empty);
-AV_WN32A(_cache[4+8* 5], top_empty);
-AV_WN32A(_cache[4+8*10], top_empty);
 }
 }
 h->cur_pic.mb_type[mb_xy] = mb_type;

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264dec: Fix mix of lossless and lossy MBs decoding

2017-08-14 Thread Anton Mitrofanov
Module: libav
Branch: release/12
Commit: 2b70d2d5149a8b4c7b0416a0b9200a8f5c73a4fa

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Sean McGovern <gsean...@gmail.com>
Date:  Thu Jun 15 18:56:16 2017 -0400

h264dec: Fix mix of lossless and lossy MBs decoding

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov <an...@khirnov.net>
(cherry picked from commit 70946e605924e2108c39f96faa369c220177f301)

Signed-off-by: Sean McGovern <gsean...@gmail.com>

---

 libavcodec/h264_cabac.c | 16 
 libavcodec/h264_cavlc.c | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index ae1ef6b..248c2d5 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2352,14 +2352,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const uint32_t *qmul;
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 // decode_cabac_mb_dqp
 if(get_cabac_noinline( >cabac, >cabac_state[60 + 
(sl->last_qscale_diff != 0)])){
 int val = 1;
@@ -2390,6 +2382,14 @@ decode_intra_mb:
 }else
 sl->last_qscale_diff=0;
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, 
cbp, 0);
 if (CHROMA444(h)) {
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, 
mb_type, cbp, 1);
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 22a643b..3bd3c84 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -1093,14 +1093,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 dquant= get_se_golomb(>gb);
 
 sl->qscale += dquant;
@@ -1117,6 +1109,14 @@ decode_intra_mb:
 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, 
mb_type, cbp, 0)) < 0 ) {
 return -1;
 }

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264_cabac: Fix CABAC+8x8dct in 4:4:4

2017-07-26 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 18d3f36d3c4d0f2c3e702f970ff8b457d7d5e39c

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Anton Khirnov <an...@khirnov.net>
Date:  Thu Jun 15 18:56:14 2017 -0400

h264_cabac: Fix CABAC+8x8dct in 4:4:4

Use the correct ctxIdxInc calculation for coded_block_flag.
Keep old behavior for old versions of x264 for backward compatibility.

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov <an...@khirnov.net>

---

 libavcodec/h264_cabac.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index b28e486..5dd285c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2329,21 +2329,40 @@ decode_intra_mb:
 if (CHROMA444(h) && IS_8x8DCT(mb_type)){
 int i;
 uint8_t *nnz_cache = sl->non_zero_count_cache;
-for (i = 0; i < 2; i++){
-if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) {
-nnz_cache[3+8* 1 + 2*8*i]=
-nnz_cache[3+8* 2 + 2*8*i]=
-nnz_cache[3+8* 6 + 2*8*i]=
-nnz_cache[3+8* 7 + 2*8*i]=
-nnz_cache[3+8*11 + 2*8*i]=
-nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+if (h->x264_build < 151U) {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
+}
+} else {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= 
!IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 
0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
 }
-}
-if (sl->top_type && !IS_8x8DCT(sl->top_type)){
-uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
-AV_WN32A(_cache[4+8* 0], top_empty);
-AV_WN32A(_cache[4+8* 5], top_empty);
-AV_WN32A(_cache[4+8*10], top_empty);
 }
 }
 h->cur_pic.mb_type[mb_xy] = mb_type;

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264dec: Fix mix of lossless and lossy MBs decoding

2017-07-26 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 70946e605924e2108c39f96faa369c220177f301

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Anton Khirnov <an...@khirnov.net>
Date:  Thu Jun 15 18:56:16 2017 -0400

h264dec: Fix mix of lossless and lossy MBs decoding

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov <an...@khirnov.net>

---

 libavcodec/h264_cabac.c | 16 
 libavcodec/h264_cavlc.c | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 5dd285c..c0b9e30 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2371,14 +2371,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const uint32_t *qmul;
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 // decode_cabac_mb_dqp
 if(get_cabac_noinline( >cabac, >cabac_state[60 + 
(sl->last_qscale_diff != 0)])){
 int val = 1;
@@ -2409,6 +2401,14 @@ decode_intra_mb:
 }else
 sl->last_qscale_diff=0;
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, 
cbp, 0);
 if (CHROMA444(h)) {
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, 
mb_type, cbp, 1);
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index c11e211..d57062b 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -1093,14 +1093,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 dquant= get_se_golomb(>gb);
 
 sl->qscale += dquant;
@@ -1117,6 +1109,14 @@ decode_intra_mb:
 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, 
mb_type, cbp, 0)) < 0 ) {
 return -1;
 }

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264: Fix decoding delay for Intra only streams

2016-06-12 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: a833ff68f6bf9dc72c3ef0ddf830ebed743c0703

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Anton Khirnov <an...@khirnov.net>
Date:  Sun May  8 13:28:00 2016 +0200

h264: Fix decoding delay for Intra only streams

Signed-off-by: Anton Khirnov <an...@khirnov.net>

---

 libavcodec/h264_ps.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index f6cd1ca..8d567ca 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -530,7 +530,8 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, 
AVCodecContext *avctx,
 
 /* if the maximum delay is not stored in the SPS, derive it based on the
  * level */
-if (!sps->bitstream_restriction_flag) {
+if (!sps->bitstream_restriction_flag &&
+(sps->ref_frame_count || avctx->strict_std_compliance >= 
FF_COMPLIANCE_STRICT)) {
 sps->num_reorder_frames = MAX_DELAYED_PIC_COUNT - 1;
 for (i = 0; i < FF_ARRAY_ELEMS(level_max_dpb_mbs); i++) {
 if (level_max_dpb_mbs[i][0] == sps->level_idc) {

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] x86inc: Enable AVX emulation in additional cases

2016-05-16 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 2fb1d17a5a6b6ff8da2434cde0fda821f06f608c

Author:Anton Mitrofanov <bugmas...@narod.ru>
Committer: Anton Khirnov <an...@khirnov.net>
Date:  Wed Apr 20 19:35:34 2016 +0200

x86inc: Enable AVX emulation in additional cases

Allows emulation to work when dst is equal to src2 as long as the
instruction is commutative, e.g. `addps m0, m1, m0`.

Signed-off-by: Anton Khirnov <an...@khirnov.net>

---

 libavutil/x86/x86inc.asm |   21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b79cc19..dca1f78 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1129,14 +1129,12 @@ INIT_XMM
 %if __emulate_avx
 %xdefine __src1 %7
 %xdefine __src2 %8
-%ifnidn %6, %7
-%if %0 >= 9
-CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9
-%else
-CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8
-%endif
-%if %5 && %4 == 0
-%ifnnum sizeof%8
+%if %5 && %4 == 0
+%ifnidn %6, %7
+%ifidn %6, %8
+%xdefine __src1 %8
+%xdefine __src2 %7
+%elifnnum sizeof%8
 ; 3-operand AVX instructions with a memory arg can only 
have it in src2,
 ; whereas SSE emulation prefers to have it in src1 (i.e. 
the mov).
 ; So, if the instruction is commutative with a memory arg, 
swap them.
@@ -1144,6 +1142,13 @@ INIT_XMM
 %xdefine __src2 %7
 %endif
 %endif
+%endif
+%ifnidn %6, __src1
+%if %0 >= 9
+CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, __src2, %9
+%else
+CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, __src2
+%endif
 %if __sizeofreg == 8
 MOVQ %6, __src1
 %elif %3

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] x86inc: warn when instructions incompatible with current cpuflags are used

2015-08-11 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: b114d28a18050b5ebd22fc067332e5487243889c

Author:Anton Mitrofanov bugmas...@narod.ru
Committer: Anton Khirnov an...@khirnov.net
Date:  Sat Aug  1 17:27:33 2015 +0200

x86inc: warn when instructions incompatible with current cpuflags are used

Signed-off-by: Henrik Gramner hen...@gramner.com
Signed-off-by: Anton Khirnov an...@khirnov.net

---

 libavutil/x86/x86inc.asm |  587 +++---
 1 file changed, 299 insertions(+), 288 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index ae6813a..96ebe37 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1069,15 +1069,16 @@ INIT_XMM
 %endmacro
 
 ;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
-;%4 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
-;%5+: operands
-%macro RUN_AVX_INSTR 5-8+
-%ifnum sizeof%6
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+;%6+: operands
+%macro RUN_AVX_INSTR 6-9+
+%ifnum sizeof%7
+%assign __sizeofreg sizeof%7
+%elifnum sizeof%6
 %assign __sizeofreg sizeof%6
-%elifnum sizeof%5
-%assign __sizeofreg sizeof%5
 %else
 %assign __sizeofreg mmsize
 %endif
@@ -1086,325 +1087,335 @@ INIT_XMM
 %xdefine __instr v%1
 %else
 %xdefine __instr %1
-%if %0 = 7+%3
+%if %0 = 8+%4
 %assign __emulate_avx 1
 %endif
 %endif
+%ifnidn %2, fnord
+%ifdef cpuname
+%if notcpuflag(%2)
+%error use of ``%1'' %2 instruction in cpuname function: 
current_function
+%elif cpuflags_%2  cpuflags_sse  notcpuflag(sse2)  
__sizeofreg  8
+%error use of ``%1'' sse2 instruction in cpuname function: 
current_function
+%endif
+%endif
+%endif
 
 %if __emulate_avx
-%xdefine __src1 %6
-%xdefine __src2 %7
-%ifnidn %5, %6
-%if %0 = 8
-CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
+%xdefine __src1 %7
+%xdefine __src2 %8
+%ifnidn %6, %7
+%if %0 = 9
+CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9
 %else
-CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
+CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8
 %endif
-%if %4  %3 == 0
-%ifnid %7
+%if %5  %4 == 0
+%ifnid %8
 ; 3-operand AVX instructions with a memory arg can only 
have it in src2,
 ; whereas SSE emulation prefers to have it in src1 (i.e. 
the mov).
 ; So, if the instruction is commutative with a memory arg, 
swap them.
-%xdefine __src1 %7
-%xdefine __src2 %6
+%xdefine __src1 %8
+%xdefine __src2 %7
 %endif
 %endif
 %if __sizeofreg == 8
-MOVQ %5, __src1
-%elif %2
-MOVAPS %5, __src1
+MOVQ %6, __src1
+%elif %3
+MOVAPS %6, __src1
 %else
-MOVDQA %5, __src1
+MOVDQA %6, __src1
 %endif
 %endif
-%if %0 = 8
-%1 %5, __src2, %8
+%if %0 = 9
+%1 %6, __src2, %9
 %else
-%1 %5, __src2
+%1 %6, __src2
 %endif
-%elif %0 = 8
-__instr %5, %6, %7, %8
+%elif %0 = 9
+__instr %6, %7, %8, %9
+%elif %0 == 8
+__instr %6, %7, %8
 %elif %0 == 7
-__instr %5, %6, %7
-%elif %0 == 6
-__instr %5, %6
+__instr %6, %7
 %else
-__instr %5
+__instr %6
 %endif
 %endmacro
 
 ;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
-;%4 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 1-4 0, 1, 0
-%macro %1 1-9 fnord, fnord, fnord, fnord, %1, %2, %3, %4
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+%macro AVX_INSTR 1-5 fnord, 0, 1, 0
+%macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5
 %ifidn %2, fnord
-RUN_AVX_INSTR %6, %7, %8, %9, %1
+RUN_AVX_INSTR %6, %7, %8, %9, %10, %1
 %elifidn %3, fnord
-RUN_AVX_INSTR %6, %7, %8, %9, %1, %2
+RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2
 %elifidn %4, fnord

[libav-commits] x86inc: warn if XOP integer FMA instruction emulation is impossible

2015-08-11 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 8c75ba55a4367c854b577c849ea2195bd78c4c81

Author:Anton Mitrofanov bugmas...@narod.ru
Committer: Anton Khirnov an...@khirnov.net
Date:  Sat Aug  1 17:27:31 2015 +0200

x86inc: warn if XOP integer FMA instruction emulation is impossible

Emulation requires a temporary register if arguments 1 and 4 are the same; this
doesn't obey the semantics of the original instruction, so we can't emulate
that in x86inc.

Also add pmacsdql emulation.

Signed-off-by: Henrik Gramner hen...@gramner.com
Signed-off-by: Anton Khirnov an...@khirnov.net

---

 libavutil/x86/x86inc.asm |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index a6e1f33..4c0a4bd 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1410,15 +1410,18 @@ AVX_INSTR pfmul, 1, 0, 1
 %macro %1 4-7 %1, %2, %3
 %if cpuflag(xop)
 v%5 %1, %2, %3, %4
-%else
+%elifnidn %1, %4
 %6 %1, %2, %3
 %7 %1, %4
+%else
+%error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
 %endif
 %endmacro
 %endmacro
 
-FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
 FMA_INSTR pmadcswd, pmaddwd, paddd
 
 ; tzcnt is equivalent to rep bsf and is backwards-compatible with bsf.

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] h264: fix 4:2:2 PCM-macroblocks decoding

2012-04-04 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 14af74e9a001f0544ee2362b624344154c4e8a0c

Author:Anton Mitrofanov bugmas...@narod.ru
Committer: Diego Biurrun di...@biurrun.de
Date:  Mon Apr  2 09:25:19 2012 -0700

h264: fix 4:2:2 PCM-macroblocks decoding

Fixes bug 239.

Signed-off-by: Ronald S. Bultje rsbul...@gmail.com

---

 libavcodec/h264.c |   15 ++-
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index fe11686..6a1a8b6 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2138,10 +2138,12 @@ static av_always_inline void 
hl_decode_mb_internal(H264Context *h, int simple,
 
 if (!simple  IS_INTRA_PCM(mb_type)) {
 if (pixel_shift) {
+static const uint16_t mb_sizes[4] = { 256, 384, 512, 768 };
 const int bit_depth = h-sps.bit_depth_luma;
 int j;
 GetBitContext gb;
-init_get_bits(gb, (uint8_t *)h-mb, 384 * bit_depth);
+init_get_bits(gb, (uint8_t *)h-mb,
+  mb_sizes[h-sps.chroma_format_idc] * bit_depth);
 
 for (i = 0; i  16; i++) {
 uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
@@ -2175,7 +2177,7 @@ static av_always_inline void 
hl_decode_mb_internal(H264Context *h, int simple,
 }
 } else {
 for (i = 0; i  16; i++)
-memcpy(dest_y + i * linesize, h-mb + i * 8, 16);
+memcpy(dest_y + i * linesize, (uint8_t *)h-mb + i * 16, 16);
 if (simple || !CONFIG_GRAY || !(s-flags  CODEC_FLAG_GRAY)) {
 if (!h-sps.chroma_format_idc) {
 for (i = 0; i  block_h; i++) {
@@ -2183,9 +2185,11 @@ static av_always_inline void 
hl_decode_mb_internal(H264Context *h, int simple,
 memset(dest_cr + i * uvlinesize, 128, 8);
 }
 } else {
+uint8_t *src_cb = (uint8_t *)h-mb + 256;
+uint8_t *src_cr = (uint8_t *)h-mb + 256 + block_h * 8;
 for (i = 0; i  block_h; i++) {
-memcpy(dest_cb + i * uvlinesize, h-mb + 128 + i * 4, 
8);
-memcpy(dest_cr + i * uvlinesize, h-mb + 160 + i * 4, 
8);
+memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
+memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
 }
 }
 }
@@ -2374,7 +2378,8 @@ static av_always_inline void 
hl_decode_mb_444_internal(H264Context *h,
 } else {
 for (p = 0; p  plane_count; p++)
 for (i = 0; i  16; i++)
-memcpy(dest[p] + i * linesize, h-mb + p * 128 + i * 8, 
16);
+memcpy(dest[p] + i * linesize,
+   (uint8_t *)h-mb + p * 256 + i * 16, 16);
 }
 } else {
 if (IS_INTRA(mb_type)) {

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] Fix decoding of lossless 10-bit 4:4:4 H.264

2011-10-29 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: fdb5314ea7fe0ab1036b8de94d47861d99e4daeb

Author:Anton Mitrofanov bugmas...@narod.ru
Committer: Ronald S. Bultje rsbul...@gmail.com
Date:  Fri Oct 28 19:13:13 2011 +0400

Fix decoding of lossless 10-bit 4:4:4 H.264

Signed-off-by: Ronald S. Bultje rsbul...@gmail.com

---

 libavcodec/h264.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 880b5b7..fc20eb4 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1779,7 +1779,7 @@ static av_always_inline void 
hl_decode_mb_predict_luma(H264Context *h, int mb_ty
 static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 
5*16, 2*16, 3*16, 6*16, 7*16,
 8*16, 
9*16,12*16,13*16,10*16,11*16,14*16,15*16};
 for(i = 0; i  16; i++)
-dctcoef_set(h-mb+p*256, pixel_shift, dc_mapping[i], 
dctcoef_get(h-mb_luma_dc[p], pixel_shift, i));
+dctcoef_set(h-mb+(p*256  pixel_shift), pixel_shift, 
dc_mapping[i], dctcoef_get(h-mb_luma_dc[p], pixel_shift, i));
 }
 }
 }else

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] Fix decoding of lossless 4:2:2 H.264

2011-10-29 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 640d5f1c801061844394813c78ea449e5826f6e5

Author:Anton Mitrofanov bugmas...@narod.ru
Committer: Ronald S. Bultje rsbul...@gmail.com
Date:  Fri Oct 28 23:33:23 2011 +0400

Fix decoding of lossless 4:2:2 H.264

Signed-off-by: Ronald S. Bultje rsbul...@gmail.com

---

 libavcodec/h264.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index fc20eb4..f7c52cd 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2002,7 +2002,7 @@ static av_always_inline void 
hl_decode_mb_internal(H264Context *h, int simple, i
 }
 if (chroma422) {
 for(i=j*16+4; ij*16+8; i++){
-if(h-non_zero_count_cache[ scan8[i] ] || 
dctcoef_get(h-mb, pixel_shift, i*16))
+if(h-non_zero_count_cache[ scan8[i+4] ] || 
dctcoef_get(h-mb, pixel_shift, i*16))
 idct_add   (dest[j-1] + block_offset[i+4], 
h-mb + (i*16  pixel_shift), uvlinesize);
 }
 }

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits