[libav-commits] decode: add a per-frame private data for hwaccel use

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: 704311b2946d74a80f65906961cd9baaa18683a3

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Jul  1 12:09:58 2017 +0200

decode: add a per-frame private data for hwaccel use

This will be useful in the CUVID hwaccel. It should also eventually
replace current decoder-specific mechanisms used by various other
hwaccels.

---

 libavcodec/decode.c | 3 +++
 libavcodec/decode.h | 6 ++
 2 files changed, 9 insertions(+)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 9050b57..c76ee66 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1025,6 +1025,9 @@ static void decode_data_free(void *opaque, uint8_t *data)
 if (fdd->post_process_opaque_free)
 fdd->post_process_opaque_free(fdd->post_process_opaque);
 
+if (fdd->hwaccel_priv_free)
+fdd->hwaccel_priv_free(fdd->hwaccel_priv);
+
 av_freep();
 }
 
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 72052f1..235f355 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -49,6 +49,12 @@ typedef struct FrameDecodeData {
 int (*post_process)(void *logctx, AVFrame *frame);
 void *post_process_opaque;
 void (*post_process_opaque_free)(void *opaque);
+
+/**
+ * Per-frame private data for hwaccels.
+ */
+void *hwaccel_priv;
+void (*hwaccel_priv_free)(void *priv);
 } FrameDecodeData;
 
 /**

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] imgutils: add function to clear an image to black

2017-07-26 Thread wm4
Module: libav
Branch: master
Commit: 45df7adc1d9b7e8fbae5af9328baa6ab3562002b

Author:wm4 
Committer: Anton Khirnov 
Date:  Sat Jul 22 23:05:14 2017 +0200

imgutils: add function to clear an image to black

Black isn't always just memset(ptr, 0, size). Limited YUV in particular
requires relatively non-obvious values, and filling a frame with
repeating 0 bytes is disallowed in some contexts. With component sizes
larger than 8 or packed YUV, this can become relatively complicated. So
having a generic function for this seems helpful.

In order to handle the complex cases in a generic way without destroying
performance, this code attempts to compute a black pixel, and then uses
that value to clear the image data quickly by using a function like
memset.

Common cases like yuv410p10 or rgba can't be handled with a simple
memset, so there is some code to fill memory with 2/4/8 byte patterns.
For the remaining cases, a generic slow fallback is used.

Signed-off-by: Anton Khirnov 

---

 doc/APIchanges   |   3 +
 libavutil/imgutils.c | 167 +++
 libavutil/imgutils.h |  27 +
 libavutil/version.h  |   2 +-
 4 files changed, 198 insertions(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 30a8f80..463247f 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2017-03-23
 
 API changes, most recent first:
 
+2017-xx-xx - xxx - lavu 56.4.0 - imgutils.h
+  Add av_image_fill_black().
+
 2017-xx-xx - xxx - lavu 56.3.0 - frame.h
   Add av_frame_apply_cropping().
 
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 84abb11..6629622 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -435,3 +435,170 @@ int av_image_copy_to_buffer(uint8_t *dst, int dst_size,
 
 return size;
 }
+
+// Fill dst[0..dst_size] with the bytes in clear[0..clear_size]. The clear
+// bytes are repeated until dst_size is reached. If dst_size is unaligned (i.e.
+// dst_size%clear_size!=0), the remaining data will be filled with the 
beginning
+// of the clear data only.
+static void memset_bytes(uint8_t *dst, size_t dst_size, uint8_t *clear,
+ size_t clear_size)
+{
+size_t pos = 0;
+int same = 1;
+int i;
+
+if (!clear_size)
+return;
+
+// Reduce to memset() if possible.
+for (i = 0; i < clear_size; i++) {
+if (clear[i] != clear[0]) {
+same = 0;
+break;
+}
+}
+if (same)
+clear_size = 1;
+
+if (clear_size == 1) {
+memset(dst, clear[0], dst_size);
+dst_size = 0;
+} else if (clear_size == 2) {
+uint16_t val = AV_RN16(clear);
+for (; dst_size >= 2; dst_size -= 2) {
+AV_WN16(dst, val);
+dst += 2;
+}
+} else if (clear_size == 4) {
+uint32_t val = AV_RN32(clear);
+for (; dst_size >= 4; dst_size -= 4) {
+AV_WN32(dst, val);
+dst += 4;
+}
+} else if (clear_size == 8) {
+uint32_t val = AV_RN64(clear);
+for (; dst_size >= 8; dst_size -= 8) {
+AV_WN64(dst, val);
+dst += 8;
+}
+}
+
+for (; dst_size; dst_size--)
+*dst++ = clear[pos++ % clear_size];
+}
+
+// Maximum size in bytes of a plane element (usually a pixel, or multiple 
pixels
+// if it's a subsampled packed format).
+#define MAX_BLOCK_SIZE 32
+
+int av_image_fill_black(uint8_t *dst_data[4], const ptrdiff_t dst_linesize[4],
+enum AVPixelFormat pix_fmt, enum AVColorRange range,
+int width, int height)
+{
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+int nb_planes = av_pix_fmt_count_planes(pix_fmt);
+// A pixel or a group of pixels on each plane, with a value that 
represents black.
+// Consider e.g. AV_PIX_FMT_UYVY422 for non-trivial cases.
+uint8_t clear_block[4][MAX_BLOCK_SIZE] = {0}; // clear padding with 0
+int clear_block_size[4] = {0};
+ptrdiff_t plane_line_bytes[4] = {0};
+int rgb, limited;
+int plane, c;
+
+if (!desc || nb_planes < 1 || nb_planes > 4 || desc->flags & 
AV_PIX_FMT_FLAG_HWACCEL)
+return AVERROR(EINVAL);
+
+rgb = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+limited = !rgb && range != AVCOL_RANGE_JPEG;
+
+if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) {
+ptrdiff_t bytewidth = av_image_get_linesize(pix_fmt, width, 0);
+uint8_t *data;
+int mono = pix_fmt == AV_PIX_FMT_MONOWHITE || pix_fmt == 
AV_PIX_FMT_MONOBLACK;
+int fill = pix_fmt == AV_PIX_FMT_MONOWHITE ? 0xFF : 0;
+if (nb_planes != 1 || !(rgb || mono) || bytewidth < 1)
+return AVERROR(EINVAL);
+
+if (!dst_data)
+return 0;
+
+data = dst_data[0];
+
+// (Bitstream + alpha will be handled incorrectly - it'll remain 
transparent.)
+ 

[libav-commits] decode: add a mechanism for performing delayed processing on the decoded frames

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: badf0951f54c1332e77455dc40398f3512540c1b

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Jul  1 12:09:58 2017 +0200

decode: add a mechanism for performing delayed processing on the decoded frames

This will be useful in the CUVID hwaccel.

---

 libavcodec/decode.c | 11 +++
 libavcodec/decode.h | 15 +++
 2 files changed, 26 insertions(+)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index bcc119c..9050b57 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -419,6 +419,14 @@ static int decode_receive_frame_internal(AVCodecContext 
*avctx, AVFrame *frame)
 
 fdd = (FrameDecodeData*)frame->opaque_ref->data;
 
+if (fdd->post_process) {
+ret = fdd->post_process(avctx, frame);
+if (ret < 0) {
+av_frame_unref(frame);
+return ret;
+}
+}
+
 user_opaque_ref = fdd->user_opaque_ref;
 fdd->user_opaque_ref = NULL;
 av_buffer_unref(>opaque_ref);
@@ -1014,6 +1022,9 @@ static void decode_data_free(void *opaque, uint8_t *data)
 
 av_buffer_unref(>user_opaque_ref);
 
+if (fdd->post_process_opaque_free)
+fdd->post_process_opaque_free(fdd->post_process_opaque);
+
 av_freep();
 }
 
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 61b53b2..72052f1 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -22,6 +22,7 @@
 #define AVCODEC_DECODE_H
 
 #include "libavutil/buffer.h"
+#include "libavutil/frame.h"
 
 #include "avcodec.h"
 
@@ -34,6 +35,20 @@ typedef struct FrameDecodeData {
  * The original user-set opaque_ref.
  */
 AVBufferRef *user_opaque_ref;
+
+/**
+ * The callback to perform some delayed processing on the frame right
+ * before it is returned to the caller.
+ *
+ * @note This code is called at some unspecified point after the frame is
+ * returned from the decoder's decode/receive_frame call. Therefore it 
cannot rely
+ * on AVCodecContext being in any specific state, so it does not get to
+ * access AVCodecContext directly at all. All the state it needs must be
+ * stored in the post_process_opaque object.
+ */
+int (*post_process)(void *logctx, AVFrame *frame);
+void *post_process_opaque;
+void (*post_process_opaque_free)(void *opaque);
 } FrameDecodeData;
 
 /**

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264dec: track the last seen value of x264_build

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: 45c4bf3df03ef53ae61fa1473424d4ae024f22e4

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Jul 22 14:52:27 2017 +0200

h264dec: track the last seen value of x264_build

Do not use the one in the SEI directly as that is reset at certain
points.

Inspired by patches from Michael Niedermayer  and
Anton Mitrofanov .

CC: libav-sta...@libav.org

---

 libavcodec/h264_direct.c | 4 ++--
 libavcodec/h264_slice.c  | 6 +-
 libavcodec/h264dec.c | 1 +
 libavcodec/h264dec.h | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 7ec49b6..abac259 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -391,7 +391,7 @@ single_col:
  (l1ref0[0] < 0 && !l1ref1[0] &&
   FFABS(l1mv1[0][0]) <= 1 &&
   FFABS(l1mv1[0][1]) <= 1 &&
-  h->sei.unregistered.x264_build > 33U))) {
+  h->x264_build > 33U))) {
 a = b = 0;
 if (ref[0] > 0)
 a = mv[0];
@@ -426,7 +426,7 @@ single_col:
 (l1ref0[i8] == 0 ||
  (l1ref0[i8] < 0 &&
   l1ref1[i8] == 0 &&
-  h->sei.unregistered.x264_build > 33U))) {
+  h->x264_build > 33U))) {
 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
 if (IS_SUB_8X8(sub_mb_type)) {
 const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index c9f1dbb..e7408b2 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -403,6 +403,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
 h->enable_er   = h1->enable_er;
 h->workaround_bugs = h1->workaround_bugs;
+h->x264_build  = h1->x264_build;
 h->droppable   = h1->droppable;
 
 // extradata/NAL handling
@@ -509,6 +510,9 @@ static int h264_frame_start(H264Context *h)
 
 h->mb_aff_frame = h->ps.sps->mb_aff && (h->picture_structure == 
PICT_FRAME);
 
+if (h->sei.unregistered.x264_build >= 0)
+h->x264_build = h->sei.unregistered.x264_build;
+
 assert(h->cur_pic_ptr->long_ref == 0);
 
 return 0;
@@ -847,7 +851,7 @@ static int h264_slice_header_init(H264Context *h)
 
 if (sps->timing_info_present_flag) {
 int64_t den = sps->time_scale;
-if (h->sei.unregistered.x264_build < 44U)
+if (h->x264_build < 44U)
 den *= 2;
 av_reduce(>avctx->framerate.den, >avctx->framerate.num,
   sps->num_units_in_tick, den, 1 << 30);
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 2a532a7..7a8293e 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -293,6 +293,7 @@ static int h264_init_context(AVCodecContext *avctx, 
H264Context *h)
 h->flags = avctx->flags;
 h->poc.prev_poc_msb  = 1 << 16;
 h->recovery_frame= -1;
+h->x264_build= -1;
 h->frame_recovered   = 0;
 
 h->next_outputed_poc = INT_MIN;
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index fc7beeb..ddfe224 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -361,6 +361,7 @@ typedef struct H264Context {
 int context_initialized;
 int flags;
 int workaround_bugs;
+int x264_build;
 /* Set when slice threading is used and at least one slice uses deblocking
  * mode 1 (i.e. across slice boundaries). Then we disable the loop filter
  * during normal MB decoding and execute it serially at the end.

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264_cabac: Fix CABAC+8x8dct in 4:4:4

2017-07-26 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 18d3f36d3c4d0f2c3e702f970ff8b457d7d5e39c

Author:Anton Mitrofanov 
Committer: Anton Khirnov 
Date:  Thu Jun 15 18:56:14 2017 -0400

h264_cabac: Fix CABAC+8x8dct in 4:4:4

Use the correct ctxIdxInc calculation for coded_block_flag.
Keep old behavior for old versions of x264 for backward compatibility.

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 

---

 libavcodec/h264_cabac.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index b28e486..5dd285c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2329,21 +2329,40 @@ decode_intra_mb:
 if (CHROMA444(h) && IS_8x8DCT(mb_type)){
 int i;
 uint8_t *nnz_cache = sl->non_zero_count_cache;
-for (i = 0; i < 2; i++){
-if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) {
-nnz_cache[3+8* 1 + 2*8*i]=
-nnz_cache[3+8* 2 + 2*8*i]=
-nnz_cache[3+8* 6 + 2*8*i]=
-nnz_cache[3+8* 7 + 2*8*i]=
-nnz_cache[3+8*11 + 2*8*i]=
-nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+if (h->x264_build < 151U) {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
+}
+} else {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= 
!IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 
0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
 }
-}
-if (sl->top_type && !IS_8x8DCT(sl->top_type)){
-uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
-AV_WN32A(_cache[4+8* 0], top_empty);
-AV_WN32A(_cache[4+8* 5], top_empty);
-AV_WN32A(_cache[4+8*10], top_empty);
 }
 }
 h->cur_pic.mb_type[mb_xy] = mb_type;

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

2017-07-26 Thread Yogender Kumar Gupta
Module: libav
Branch: master
Commit: 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e

Author:Yogender Kumar Gupta 
Committer: Anton Khirnov 
Date:  Thu Jun 15 18:56:13 2017 -0400

h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 

---

 libavcodec/h264_mb.c   |  7 +++-
 libavcodec/h264pred.c  |  2 ++
 libavcodec/h264pred.h  |  3 ++
 libavcodec/h264pred_template.c | 73 ++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index f037bd5..51d73ce 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -636,7 +636,12 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
 uint8_t *const ptr = dest_y + block_offset[i];
 const int dir  = sl->intra4x4_pred_mode_cache[scan8[i]];
 if (transform_bypass && h->ps.sps->profile_idc == 244 && dir 
<= 1) {
-h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 
<< pixel_shift), linesize);
+if (h->x264_build < 151U) {
+h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 
256 << pixel_shift), linesize);
+} else
+h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 
+ p * 256 << pixel_shift),
+(sl-> 
topleft_samples_available << i) & 0x8000,
+
(sl->topright_samples_available << i) & 0x4000, linesize);
 } else {
 const int nnz = sl->non_zero_count_cache[scan8[i + p * 
16]];
 h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available 
<< i) & 0x8000,
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 7627eb0..135babc 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int 
codec_id,
 h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add  , 
depth);\
 h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add   , 
depth);\
 h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add , 
depth);\
+h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add  
 , depth);\
+h->pred8x8l_filter_add [ HOR_PRED   ]= 
FUNCC(pred8x8l_horizontal_filter_add , depth);\
 if (chroma_format_idc <= 1) {\
 h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, 
depth);\
 h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add  , 
depth);\
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 60e7434..795d8f3 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,6 +101,9 @@ typedef struct H264PredContext {
   int16_t *block /*align 16*/, ptrdiff_t stride);
 void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
int16_t *block /*align 16*/, ptrdiff_t stride);
+void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+  int16_t *block /*align 16*/,
+  int topleft, int topright, ptrdiff_t stride);
 void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
   const int *block_offset,
   int16_t *block /*align 16*/, ptrdiff_t stride);
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 8492b2b..02494aa 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, 
int has_topleft,
 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
 }
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t 
*_block, int has_topleft,
+int has_topright, ptrdiff_t 
_stride)
+{
+int i;
+pixel *src = (pixel*)_src;
+const dctcoef *block = (const dctcoef*)_block;
+pixel pix[8];
+int stride = _stride/sizeof(pixel);
+PREDICT_8x8_LOAD_TOP;
+
+pix[0] = t0;
+pix[1] = t1;
+pix[2] = t2;
+pix[3] = t3;
+pix[4] = t4;
+pix[5] = t5;
+pix[6] = t6;
+pix[7] = t7;
+
+for (i = 0; i < 8; i++) {
+pixel v = pix[i];
+src[0 * stride] = v += block[0];
+src[1 * stride] = v += block[8];
+src[2 * stride] = v += block[16];
+src[3 * stride] = v += block[24];
+src[4 * stride] = v += block[32];
+src[5 * stride] = v += block[40];
+src[6 * stride] = v += block[48];
+src[7 * stride] = v +  block[56];
+src++;
+block++;
+}
+
+

[libav-commits] h264dec: Fix mix of lossless and lossy MBs decoding

2017-07-26 Thread Anton Mitrofanov
Module: libav
Branch: master
Commit: 70946e605924e2108c39f96faa369c220177f301

Author:Anton Mitrofanov 
Committer: Anton Khirnov 
Date:  Thu Jun 15 18:56:16 2017 -0400

h264dec: Fix mix of lossless and lossy MBs decoding

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 

---

 libavcodec/h264_cabac.c | 16 
 libavcodec/h264_cavlc.c | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 5dd285c..c0b9e30 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2371,14 +2371,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const uint32_t *qmul;
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 // decode_cabac_mb_dqp
 if(get_cabac_noinline( >cabac, >cabac_state[60 + 
(sl->last_qscale_diff != 0)])){
 int val = 1;
@@ -2409,6 +2401,14 @@ decode_intra_mb:
 }else
 sl->last_qscale_diff=0;
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, 
cbp, 0);
 if (CHROMA444(h)) {
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, 
mb_type, cbp, 1);
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index c11e211..d57062b 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -1093,14 +1093,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 dquant= get_se_golomb(>gb);
 
 sl->qscale += dquant;
@@ -1117,6 +1109,14 @@ decode_intra_mb:
 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, 
mb_type, cbp, 0)) < 0 ) {
 return -1;
 }

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] h264dec: add a CUVID hwaccel

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: b9129ec4668c511e0a79e25c6f25d748cee172c9

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Feb 11 16:49:34 2017 +0100

h264dec: add a CUVID hwaccel

Some parts of the code are based on a patch by
Timo Rothenpieler 

---

 Changelog   |   1 +
 avtools/avconv.h|   1 +
 avtools/avconv_opt.c|   4 +
 configure   |  11 +-
 libavcodec/Makefile |   2 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/cuvid.c  | 425 
 libavcodec/cuvid.h  |  61 +++
 libavcodec/cuvid_h264.c | 177 
 libavcodec/h264_slice.c |   6 +-
 10 files changed, 687 insertions(+), 2 deletions(-)

diff --git a/Changelog b/Changelog
index 82e3e3a..7115066 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version :
 - support for decoding through D3D11VA in avconv
 - Cinepak encoder
 - Intel QSV-accelerated MJPEG encoding
+- NVIDIA CUVID-accelerated H.264 decoding
 
 
 version 12:
diff --git a/avtools/avconv.h b/avtools/avconv.h
index 4c69933..b5843fb 100644
--- a/avtools/avconv.h
+++ b/avtools/avconv.h
@@ -58,6 +58,7 @@ enum HWAccelID {
 HWACCEL_QSV,
 HWACCEL_VAAPI,
 HWACCEL_D3D11VA,
+HWACCEL_CUVID,
 };
 
 typedef struct HWAccel {
diff --git a/avtools/avconv_opt.c b/avtools/avconv_opt.c
index 575ce12..df69336 100644
--- a/avtools/avconv_opt.c
+++ b/avtools/avconv_opt.c
@@ -80,6 +80,10 @@ const HWAccel hwaccels[] = {
 { "vaapi", hwaccel_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI,
   AV_HWDEVICE_TYPE_VAAPI },
 #endif
+#if CONFIG_CUVID
+{ "cuvid", hwaccel_decode_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA,
+   AV_HWDEVICE_TYPE_CUDA },
+#endif
 { 0 },
 };
 int hwaccel_lax_profile_check = 0;
diff --git a/configure b/configure
index 35ae031..7a7fbb2 100755
--- a/configure
+++ b/configure
@@ -237,6 +237,7 @@ External library support:
 
   The following libraries provide various hardware acceleration features:
   --enable-cudaNvidia CUDA (dynamically linked)
+  --enable-cuvid   Nvidia CUVID video decode acceleration
   --enable-d3d11va Microsoft Direct3D 11 video acceleration [auto]
   --enable-dxva2   Microsoft DirectX 9 video acceleration [auto]
   --enable-libmfx  Intel MediaSDK (AKA Quick Sync Video)
@@ -1266,6 +1267,7 @@ EXTRALIBS_LIST="
 
 HWACCEL_LIBRARY_NONFREE_LIST="
 cuda
+cuvid
 libnpp
 "
 HWACCEL_LIBRARY_LIST="
@@ -1686,6 +1688,7 @@ TOOLCHAIN_FEATURES="
 
 TYPES_LIST="
 CONDITION_VARIABLE_Ptr
+CUVIDDECODECREATEINFO_bitDepthMinus8
 socklen_t
 struct_addrinfo
 struct_group_source_req
@@ -2189,6 +2192,8 @@ vda_extralibs="-framework CoreFoundation -framework 
VideoDecodeAcceleration -fra
 
 h263_vaapi_hwaccel_deps="vaapi"
 h263_vaapi_hwaccel_select="h263_decoder"
+h264_cuvid_hwaccel_deps="cuvid CUVIDH264PICPARAMS"
+h264_cuvid_hwaccel_select="h264_decoder"
 h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
 h264_d3d11va2_hwaccel_deps="d3d11va"
@@ -2556,7 +2561,7 @@ avdevice_extralibs="libm_extralibs"
 avformat_extralibs="libm_extralibs"
 avfilter_extralibs="pthreads_extralibs libm_extralibs"
 avresample_extralibs="libm_extralibs"
-avutil_extralibs="clock_gettime_extralibs cuda_extralibs libm_extralibs 
libmfx_extralibs nanosleep_extralibs pthreads_extralibs user32_extralibs 
vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs 
wincrypt_extralibs"
+avutil_extralibs="clock_gettime_extralibs cuda_extralibs cuvid_extralibs 
libm_extralibs libmfx_extralibs nanosleep_extralibs pthreads_extralibs 
user32_extralibs vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs 
vdpau_x11_extralibs wincrypt_extralibs"
 swscale_extralibs="libm_extralibs"
 
 # programs
@@ -4694,6 +4699,9 @@ check_lib psapi"windows.h psapi.h"
GetProcessMemoryInfo -lpsapi
 
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 
+check_type "cuviddec.h" "CUVIDH264PICPARAMS"
+check_struct "cuviddec.h" "CUVIDDECODECREATEINFO" bitDepthMinus8
+
 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" 
-DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
 check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
 check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
@@ -4753,6 +4761,7 @@ done
 enabled avisynth  && require_header avisynth/avisynth_c.h
 enabled avxsynth  && require_header avxsynth/avxsynth_c.h
 enabled cuda  && require cuda cuda.h cuInit -lcuda
+enabled cuvid && require cuvid cuviddec.h cuvidCreateDecoder 
-lnvcuvid
 enabled frei0r&& require_header frei0r.h
 enabled gnutls&& require_pkg_config gnutls gnutls gnutls/gnutls.h 
gnutls_global_init
 enabled libbs2b   && require_pkg_config libbs2b libbs2b bs2b.h 
bs2b_open
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 12bf8fe..ac13166 100644

[libav-commits] lavc, lavu: move frame cropping to a convenience function

2017-07-26 Thread wm4
Module: libav
Branch: master
Commit: 47399ccdfd93d337c96c76fbf591f0e3637131ef

Author:wm4 
Committer: Anton Khirnov 
Date:  Sat Jul 22 23:05:13 2017 +0200

lavc, lavu: move frame cropping to a convenience function

Signed-off-by: Anton Khirnov 

---

 doc/APIchanges  |   3 ++
 libavcodec/decode.c |  89 ++
 libavutil/frame.c   | 100 
 libavutil/frame.h   |  34 ++
 libavutil/version.h |   2 +-
 5 files changed, 140 insertions(+), 88 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 0f7c839..30a8f80 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2017-03-23
 
 API changes, most recent first:
 
+2017-xx-xx - xxx - lavu 56.3.0 - frame.h
+  Add av_frame_apply_cropping().
+
 2017-xx-xx - xxx - lavc 58.4.0 - avcodec.h
   DXVA2 and D3D11 hardware accelerated decoding now supports the new hwaccel 
API,
   which can create the decoder context and allocate hardware frame 
automatically.
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 175a6fa..9644e89 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -446,44 +446,8 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext 
*avctx, const AVPacke
 return 0;
 }
 
-static int calc_cropping_offsets(size_t offsets[4], const AVFrame *frame,
- const AVPixFmtDescriptor *desc)
-{
-int i, j;
-
-for (i = 0; frame->data[i]; i++) {
-const AVComponentDescriptor *comp = NULL;
-int shift_x = (i == 1 || i == 2) ? desc->log2_chroma_w : 0;
-int shift_y = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
-
-if (desc->flags & (AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_PSEUDOPAL) && 
i == 1) {
-offsets[i] = 0;
-break;
-}
-
-/* find any component descriptor for this plane */
-for (j = 0; j < desc->nb_components; j++) {
-if (desc->comp[j].plane == i) {
-comp = >comp[j];
-break;
-}
-}
-if (!comp)
-return AVERROR_BUG;
-
-offsets[i] = (frame->crop_top  >> shift_y) * frame->linesize[i] +
- (frame->crop_left >> shift_x) * comp->step;
-}
-
-return 0;
-}
-
 static int apply_cropping(AVCodecContext *avctx, AVFrame *frame)
 {
-const AVPixFmtDescriptor *desc;
-size_t offsets[4];
-int i;
-
 /* make sure we are noisy about decoders returning invalid cropping data */
 if (frame->crop_left >= INT_MAX - frame->crop_right||
 frame->crop_top  >= INT_MAX - frame->crop_bottom   ||
@@ -504,57 +468,8 @@ static int apply_cropping(AVCodecContext *avctx, AVFrame 
*frame)
 if (!avctx->apply_cropping)
 return 0;
 
-desc = av_pix_fmt_desc_get(frame->format);
-if (!desc)
-return AVERROR_BUG;
-
-/* Apply just the right/bottom cropping for hwaccel formats. Bitstream
- * formats cannot be easily handled here either (and corresponding decoders
- * should not export any cropping anyway), so do the same for those as 
well.
- * */
-if (desc->flags & (AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_HWACCEL)) {
-frame->width  -= frame->crop_right;
-frame->height -= frame->crop_bottom;
-frame->crop_right  = 0;
-frame->crop_bottom = 0;
-return 0;
-}
-
-/* calculate the offsets for each plane */
-calc_cropping_offsets(offsets, frame, desc);
-
-/* adjust the offsets to avoid breaking alignment */
-if (!(avctx->flags & AV_CODEC_FLAG_UNALIGNED)) {
-int log2_crop_align = frame->crop_left ? av_ctz(frame->crop_left) : 
INT_MAX;
-int min_log2_align = INT_MAX;
-
-for (i = 0; frame->data[i]; i++) {
-int log2_align = offsets[i] ? av_ctz(offsets[i]) : INT_MAX;
-min_log2_align = FFMIN(log2_align, min_log2_align);
-}
-
-/* we assume, and it should always be true, that the data alignment is
- * related to the cropping alignment by a constant power-of-2 factor */
-if (log2_crop_align < min_log2_align)
-return AVERROR_BUG;
-
-if (min_log2_align < 5) {
-frame->crop_left &= ~((1 << (5 + log2_crop_align - 
min_log2_align)) - 1);
-calc_cropping_offsets(offsets, frame, desc);
-}
-}
-
-for (i = 0; frame->data[i]; i++)
-frame->data[i] += offsets[i];
-
-frame->width  -= (frame->crop_left + frame->crop_right);
-frame->height -= (frame->crop_top  + frame->crop_bottom);
-frame->crop_left   = 0;
-frame->crop_right  = 0;
-frame->crop_top= 0;
-frame->crop_bottom = 0;
-
-return 0;
+return av_frame_apply_cropping(frame, avctx->flags & 
AV_CODEC_FLAG_UNALIGNED ?
+  AV_FRAME_CROP_UNALIGNED : 

[libav-commits] decode: avoid leaks on failure in ff_get_buffer()

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: de77671438c24ffea93398c8dc885d4dd04477de

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Jul  1 11:32:56 2017 +0200

decode: avoid leaks on failure in ff_get_buffer()

If the get_buffer() call fails, the frame might have some side data
already set. Make sure it gets freed.

CC: libav-sta...@libav.org

---

 libavcodec/decode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 9644e89..f7cd7f6 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1069,6 +1069,9 @@ end:
 frame->height = avctx->height;
 }
 
+if (ret < 0)
+av_frame_unref(frame);
+
 return ret;
 }
 

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits

[libav-commits] decode: add a method for attaching lavc-internal data to frames

2017-07-26 Thread Anton Khirnov
Module: libav
Branch: master
Commit: 359a8a3e2d1194b52b6c386f94fd0929567dfb67

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Sat Jul  1 11:12:44 2017 +0200

decode: add a method for attaching lavc-internal data to frames

Use the AVFrame.opaque_ref field. The original user's opaque_ref is
wrapped in the lavc struct and then unwrapped before the frame is
returned to the caller.

This new struct will be useful in the following commits.

---

 libavcodec/decode.c | 57 +
 libavcodec/decode.h | 13 
 2 files changed, 70 insertions(+)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index f7cd7f6..bcc119c 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -406,6 +406,26 @@ static int decode_receive_frame_internal(AVCodecContext 
*avctx, AVFrame *frame)
 if (ret == AVERROR_EOF)
 avci->draining_done = 1;
 
+/* unwrap the per-frame decode data and restore the original opaque_ref*/
+if (!ret) {
+/* the only case where decode data is not set should be decoders
+ * that do not call ff_get_buffer() */
+av_assert0((frame->opaque_ref && frame->opaque_ref->size == 
sizeof(FrameDecodeData)) ||
+   !(avctx->codec->capabilities & AV_CODEC_CAP_DR1));
+
+if (frame->opaque_ref) {
+FrameDecodeData *fdd;
+AVBufferRef *user_opaque_ref;
+
+fdd = (FrameDecodeData*)frame->opaque_ref->data;
+
+user_opaque_ref = fdd->user_opaque_ref;
+fdd->user_opaque_ref = NULL;
+av_buffer_unref(>opaque_ref);
+frame->opaque_ref = user_opaque_ref;
+}
+}
+
 return ret;
 }
 
@@ -988,6 +1008,37 @@ FF_ENABLE_DEPRECATION_WARNINGS
 return 0;
 }
 
+static void decode_data_free(void *opaque, uint8_t *data)
+{
+FrameDecodeData *fdd = (FrameDecodeData*)data;
+
+av_buffer_unref(>user_opaque_ref);
+
+av_freep();
+}
+
+static int attach_decode_data(AVFrame *frame)
+{
+AVBufferRef *fdd_buf;
+FrameDecodeData *fdd;
+
+fdd = av_mallocz(sizeof(*fdd));
+if (!fdd)
+return AVERROR(ENOMEM);
+
+fdd_buf = av_buffer_create((uint8_t*)fdd, sizeof(*fdd), decode_data_free,
+   NULL, AV_BUFFER_FLAG_READONLY);
+if (!fdd_buf) {
+av_freep();
+return AVERROR(ENOMEM);
+}
+
+fdd->user_opaque_ref = frame->opaque_ref;
+frame->opaque_ref= fdd_buf;
+
+return 0;
+}
+
 int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
 {
 const AVHWAccel *hwaccel = avctx->hwaccel;
@@ -1061,6 +1112,12 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, 
int flags)
 avctx->sw_pix_fmt = avctx->pix_fmt;
 
 ret = avctx->get_buffer2(avctx, frame, flags);
+if (ret < 0)
+goto end;
+
+ret = attach_decode_data(frame);
+if (ret < 0)
+goto end;
 
 end:
 if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions &&
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 2f29cf6..61b53b2 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -21,9 +21,22 @@
 #ifndef AVCODEC_DECODE_H
 #define AVCODEC_DECODE_H
 
+#include "libavutil/buffer.h"
+
 #include "avcodec.h"
 
 /**
+ * This struct stores per-frame lavc-internal data and is attached to it via
+ * opaque_ref.
+ */
+typedef struct FrameDecodeData {
+/**
+ * The original user-set opaque_ref.
+ */
+AVBufferRef *user_opaque_ref;
+} FrameDecodeData;
+
+/**
  * Called by decoders to get the next packet for decoding.
  *
  * @param pkt An empty packet to be filled with data.

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits