[libav-devel] [PATCH 4/4] h264dec: Fix mix of lossless and lossy MBs decoding

2017-07-24 Thread Anton Khirnov
From: Anton Mitrofanov 

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 
---
 libavcodec/h264_cabac.c | 16 
 libavcodec/h264_cavlc.c | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 5dd285c..c0b9e30 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2371,14 +2371,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const uint32_t *qmul;
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 // decode_cabac_mb_dqp
 if(get_cabac_noinline( >cabac, >cabac_state[60 + 
(sl->last_qscale_diff != 0)])){
 int val = 1;
@@ -2409,6 +2401,14 @@ decode_intra_mb:
 }else
 sl->last_qscale_diff=0;
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, 
cbp, 0);
 if (CHROMA444(h)) {
 decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, 
mb_type, cbp, 1);
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index c11e211..d57062b 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -1093,14 +1093,6 @@ decode_intra_mb:
 const uint8_t *scan, *scan8x8;
 const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
 
-if(IS_INTERLACED(mb_type)){
-scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->field_scan : h->field_scan_q0;
-}else{
-scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
-scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
-}
-
 dquant= get_se_golomb(>gb);
 
 sl->qscale += dquant;
@@ -1117,6 +1109,14 @@ decode_intra_mb:
 sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
 sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
 
+if(IS_INTERLACED(mb_type)){
+scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : 
h->field_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->field_scan : h->field_scan_q0;
+}else{
+scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : 
h->zigzag_scan8x8_cavlc_q0;
+scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+}
+
 if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, 
mb_type, cbp, 0)) < 0 ) {
 return -1;
 }
-- 
2.0.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/4] h264_cabac: Fix CABAC+8x8dct in 4:4:4

2017-07-24 Thread Anton Khirnov
From: Anton Mitrofanov 

Use the correct ctxIdxInc calculation for coded_block_flag.
Keep old behavior for old versions of x264 for backward compatibility.

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 
---
 libavcodec/h264_cabac.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index b28e486..5dd285c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2329,21 +2329,40 @@ decode_intra_mb:
 if (CHROMA444(h) && IS_8x8DCT(mb_type)){
 int i;
 uint8_t *nnz_cache = sl->non_zero_count_cache;
-for (i = 0; i < 2; i++){
-if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) {
-nnz_cache[3+8* 1 + 2*8*i]=
-nnz_cache[3+8* 2 + 2*8*i]=
-nnz_cache[3+8* 6 + 2*8*i]=
-nnz_cache[3+8* 7 + 2*8*i]=
-nnz_cache[3+8*11 + 2*8*i]=
-nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+if (h->x264_build < 151U) {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
+}
+} else {
+for (i = 0; i < 2; i++){
+if (sl->left_type[LEFT(i)] && 
!IS_8x8DCT(sl->left_type[LEFT(i)])) {
+nnz_cache[3+8* 1 + 2*8*i]=
+nnz_cache[3+8* 2 + 2*8*i]=
+nnz_cache[3+8* 6 + 2*8*i]=
+nnz_cache[3+8* 7 + 2*8*i]=
+nnz_cache[3+8*11 + 2*8*i]=
+nnz_cache[3+8*12 + 2*8*i]= 
!IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64;
+}
+}
+if (sl->top_type && !IS_8x8DCT(sl->top_type)){
+uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 
0x40404040;
+AV_WN32A(_cache[4+8* 0], top_empty);
+AV_WN32A(_cache[4+8* 5], top_empty);
+AV_WN32A(_cache[4+8*10], top_empty);
 }
-}
-if (sl->top_type && !IS_8x8DCT(sl->top_type)){
-uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040;
-AV_WN32A(_cache[4+8* 0], top_empty);
-AV_WN32A(_cache[4+8* 5], top_empty);
-AV_WN32A(_cache[4+8*10], top_empty);
 }
 }
 h->cur_pic.mb_type[mb_xy] = mb_type;
-- 
2.0.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/4] h264dec: track the last seen value of x264_build

2017-07-24 Thread Anton Khirnov
Do not use the one in the SEI directly as that is reset at certain
points.

Inspired by patches from Michael Niedermayer  and
Anton Mitrofanov .

CC: libav-sta...@libav.org
---
 libavcodec/h264_direct.c | 4 ++--
 libavcodec/h264_slice.c  | 6 +-
 libavcodec/h264dec.c | 1 +
 libavcodec/h264dec.h | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 7ec49b6..abac259 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -391,7 +391,7 @@ single_col:
  (l1ref0[0] < 0 && !l1ref1[0] &&
   FFABS(l1mv1[0][0]) <= 1 &&
   FFABS(l1mv1[0][1]) <= 1 &&
-  h->sei.unregistered.x264_build > 33U))) {
+  h->x264_build > 33U))) {
 a = b = 0;
 if (ref[0] > 0)
 a = mv[0];
@@ -426,7 +426,7 @@ single_col:
 (l1ref0[i8] == 0 ||
  (l1ref0[i8] < 0 &&
   l1ref1[i8] == 0 &&
-  h->sei.unregistered.x264_build > 33U))) {
+  h->x264_build > 33U))) {
 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
 if (IS_SUB_8X8(sub_mb_type)) {
 const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index c9f1dbb..e7408b2 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -403,6 +403,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
 h->enable_er   = h1->enable_er;
 h->workaround_bugs = h1->workaround_bugs;
+h->x264_build  = h1->x264_build;
 h->droppable   = h1->droppable;
 
 // extradata/NAL handling
@@ -509,6 +510,9 @@ static int h264_frame_start(H264Context *h)
 
 h->mb_aff_frame = h->ps.sps->mb_aff && (h->picture_structure == 
PICT_FRAME);
 
+if (h->sei.unregistered.x264_build >= 0)
+h->x264_build = h->sei.unregistered.x264_build;
+
 assert(h->cur_pic_ptr->long_ref == 0);
 
 return 0;
@@ -847,7 +851,7 @@ static int h264_slice_header_init(H264Context *h)
 
 if (sps->timing_info_present_flag) {
 int64_t den = sps->time_scale;
-if (h->sei.unregistered.x264_build < 44U)
+if (h->x264_build < 44U)
 den *= 2;
 av_reduce(>avctx->framerate.den, >avctx->framerate.num,
   sps->num_units_in_tick, den, 1 << 30);
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 2a532a7..7a8293e 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -293,6 +293,7 @@ static int h264_init_context(AVCodecContext *avctx, 
H264Context *h)
 h->flags = avctx->flags;
 h->poc.prev_poc_msb  = 1 << 16;
 h->recovery_frame= -1;
+h->x264_build= -1;
 h->frame_recovered   = 0;
 
 h->next_outputed_poc = INT_MIN;
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index fc7beeb..ddfe224 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -361,6 +361,7 @@ typedef struct H264Context {
 int context_initialized;
 int flags;
 int workaround_bugs;
+int x264_build;
 /* Set when slice threading is used and at least one slice uses deblocking
  * mode 1 (i.e. across slice boundaries). Then we disable the loop filter
  * during normal MB decoding and execute it serially at the end.
-- 
2.0.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/4] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction

2017-07-24 Thread Anton Khirnov
From: Yogender Kumar Gupta 

CC: libav-sta...@libav.org

Signed-off-by: Anton Khirnov 
---
 libavcodec/h264_mb.c   |  7 +++-
 libavcodec/h264pred.c  |  2 ++
 libavcodec/h264pred.h  |  3 ++
 libavcodec/h264pred_template.c | 73 ++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index f037bd5..51d73ce 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -636,7 +636,12 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
 uint8_t *const ptr = dest_y + block_offset[i];
 const int dir  = sl->intra4x4_pred_mode_cache[scan8[i]];
 if (transform_bypass && h->ps.sps->profile_idc == 244 && dir 
<= 1) {
-h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 
<< pixel_shift), linesize);
+if (h->x264_build < 151U) {
+h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 
256 << pixel_shift), linesize);
+} else
+h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 
+ p * 256 << pixel_shift),
+(sl-> 
topleft_samples_available << i) & 0x8000,
+
(sl->topright_samples_available << i) & 0x4000, linesize);
 } else {
 const int nnz = sl->non_zero_count_cache[scan8[i + p * 
16]];
 h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available 
<< i) & 0x8000,
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 7627eb0..135babc 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int 
codec_id,
 h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add  , 
depth);\
 h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add   , 
depth);\
 h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add , 
depth);\
+h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add  
 , depth);\
+h->pred8x8l_filter_add [ HOR_PRED   ]= 
FUNCC(pred8x8l_horizontal_filter_add , depth);\
 if (chroma_format_idc <= 1) {\
 h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, 
depth);\
 h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add  , 
depth);\
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 60e74349..795d8f3 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,6 +101,9 @@ typedef struct H264PredContext {
   int16_t *block /*align 16*/, ptrdiff_t stride);
 void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
int16_t *block /*align 16*/, ptrdiff_t stride);
+void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+  int16_t *block /*align 16*/,
+  int topleft, int topright, ptrdiff_t stride);
 void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
   const int *block_offset,
   int16_t *block /*align 16*/, ptrdiff_t stride);
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 8492b2b..02494aa 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, 
int has_topleft,
 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
 }
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t 
*_block, int has_topleft,
+int has_topright, ptrdiff_t 
_stride)
+{
+int i;
+pixel *src = (pixel*)_src;
+const dctcoef *block = (const dctcoef*)_block;
+pixel pix[8];
+int stride = _stride/sizeof(pixel);
+PREDICT_8x8_LOAD_TOP;
+
+pix[0] = t0;
+pix[1] = t1;
+pix[2] = t2;
+pix[3] = t3;
+pix[4] = t4;
+pix[5] = t5;
+pix[6] = t6;
+pix[7] = t7;
+
+for (i = 0; i < 8; i++) {
+pixel v = pix[i];
+src[0 * stride] = v += block[0];
+src[1 * stride] = v += block[8];
+src[2 * stride] = v += block[16];
+src[3 * stride] = v += block[24];
+src[4 * stride] = v += block[32];
+src[5 * stride] = v += block[40];
+src[6 * stride] = v += block[48];
+src[7 * stride] = v +  block[56];
+src++;
+block++;
+}
+
+memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t 
*_block, int has_topleft,
+  int has_topright, ptrdiff_t 
_stride)
+{
+int i;
+

Re: [libav-devel] [PATCH 06/15] lavc: Add coded bitstream read/write support for H.265

2017-07-24 Thread Anton Khirnov
Quoting Mark Thompson (2017-06-24 01:39:12)
> ---
>  libavcodec/cbs.c |1 +
>  libavcodec/cbs_h2645.c   |  410 +++-
>  libavcodec/cbs_h265.h|  544 
>  libavcodec/cbs_h265_syntax.c | 1482 
> ++
>  libavcodec/cbs_internal.h|1 +
>  5 files changed, 2435 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/cbs_h265.h
>  create mode 100644 libavcodec/cbs_h265_syntax.c
> 

Looks okish from a quick look

-- 
Anton Khirnov
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] h264dec: add a CUVID hwaccel

2017-07-24 Thread Anton Khirnov
Quoting Luca Barbato (2017-07-24 18:21:20)
> On 24/07/2017 15:15, Anton Khirnov wrote:
> > Some parts of the code are based on a patch by
> > Timo Rothenpieler 
> > ---
> > Now with high bit depth support
> > ---
> 
> I recently updated the nvidia-video-codec distribution to the version 8,
> I guess that's needed for that, isn't it?

To get 10bit decoding yes. 8bit should still work with older versions of
the header/derivers.

-- 
Anton Khirnov
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] hevcdec: add a CUVID hwaccel

2017-07-24 Thread Luca Barbato
On 24/07/2017 15:16, Anton Khirnov wrote:
> ---
> Now with 10bit decoding
> ---
>  Changelog   |   2 +-
>  configure   |   3 +
>  libavcodec/Makefile |   1 +
>  libavcodec/allcodecs.c  |   1 +
>  libavcodec/cuvid.c  |   1 +
>  libavcodec/cuvid_hevc.c | 280 
> 
>  libavcodec/hevcdec.c|   9 +-
>  7 files changed, 295 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/cuvid_hevc.c
> 

Probably ok, I'll test tonight.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] h264dec: add a CUVID hwaccel

2017-07-24 Thread Luca Barbato
On 24/07/2017 15:15, Anton Khirnov wrote:
> Some parts of the code are based on a patch by
> Timo Rothenpieler 
> ---
> Now with high bit depth support
> ---

I recently updated the nvidia-video-codec distribution to the version 8,
I guess that's needed for that, isn't it?

lu

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] hevcdec: add a CUVID hwaccel

2017-07-24 Thread Anton Khirnov
---
Now with 10bit decoding
---
 Changelog   |   2 +-
 configure   |   3 +
 libavcodec/Makefile |   1 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/cuvid.c  |   1 +
 libavcodec/cuvid_hevc.c | 280 
 libavcodec/hevcdec.c|   9 +-
 7 files changed, 295 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/cuvid_hevc.c

diff --git a/Changelog b/Changelog
index f3c8f7a..59a2eba 100644
--- a/Changelog
+++ b/Changelog
@@ -17,7 +17,7 @@ version :
 - ClearVideo decoder (I-frames only)
 - support for decoding through D3D11VA in avconv
 - Cinepak encoder
-- NVIDIA CUVID-accelerated H.264 decoding
+- NVIDIA CUVID-accelerated H.264 and HEVC decoding
 
 
 version 12:
diff --git a/configure b/configure
index 0eeb46b..23e0ab6 100755
--- a/configure
+++ b/configure
@@ -2210,6 +2210,8 @@ h264_vda_old_hwaccel_deps="vda"
 h264_vda_old_hwaccel_select="h264_decoder"
 h264_vdpau_hwaccel_deps="vdpau"
 h264_vdpau_hwaccel_select="h264_decoder"
+hevc_cuvid_hwaccel_deps="cuvid CUVIDHEVCPICPARAMS"
+hevc_cuvid_hwaccel_select="hevc_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
 hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
@@ -4698,6 +4700,7 @@ check_lib psapi"windows.h psapi.h"
GetProcessMemoryInfo -lpsapi
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 
 check_type "cuviddec.h" "CUVIDH264PICPARAMS"
+check_type "cuviddec.h" "CUVIDHEVCPICPARAMS"
 check_struct "cuviddec.h" "CUVIDDECODECREATEINFO" bitDepthMinus8
 
 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" 
-DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 66f6f9e..12c8678 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -640,6 +640,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL)   += qsvdec_h2645.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDA_HWACCEL)   += vda_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
+OBJS-$(CONFIG_HEVC_CUVID_HWACCEL) += cuvid_hevc.o
 OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)   += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)   += qsvdec_h2645.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 97b8810..717e18f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -78,6 +78,7 @@ void avcodec_register_all(void)
 REGISTER_HWACCEL(H264_VDA,  h264_vda);
 REGISTER_HWACCEL(H264_VDA_OLD,  h264_vda_old);
 REGISTER_HWACCEL(H264_VDPAU,h264_vdpau);
+REGISTER_HWACCEL(HEVC_CUVID,hevc_cuvid);
 REGISTER_HWACCEL(HEVC_D3D11VA,  hevc_d3d11va);
 REGISTER_HWACCEL(HEVC_D3D11VA2, hevc_d3d11va2);
 REGISTER_HWACCEL(HEVC_DXVA2,hevc_dxva2);
diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index 69f624c..2d35e92 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -53,6 +53,7 @@ static int map_avcodec_id(enum AVCodecID id)
 {
 switch (id) {
 case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
+case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
 }
 return -1;
 }
diff --git a/libavcodec/cuvid_hevc.c b/libavcodec/cuvid_hevc.c
new file mode 100644
index 000..5de9bca
--- /dev/null
+++ b/libavcodec/cuvid_hevc.c
@@ -0,0 +1,280 @@
+/*
+ * HEVC HW decode acceleration through CUVID
+ *
+ * Copyright (c) 2017 Anton Khirnov
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include 
+#include 
+
+#include "avcodec.h"
+#include "cuvid.h"
+#include "decode.h"
+#include "internal.h"
+#include "hevcdec.h"
+#include "hevc_data.h"
+
+static void dpb_add(CUVIDHEVCPICPARAMS *pp, int idx, const HEVCFrame *src)
+{
+FrameDecodeData *fdd = (FrameDecodeData*)src->frame->opaque_ref->data;
+const CUVIDFrame *cf = fdd->hwaccel_priv;
+
+pp->RefPicIdx[idx]  = cf ? cf->idx : -1;
+pp->PicOrderCntVal[idx] = src->poc;
+pp->IsLongTerm[idx] = !!(src->flags & HEVC_FRAME_FLAG_LONG_REF);
+}
+
+static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s)
+{
+const ScalingList *sl = 

[libav-devel] [PATCH] h264dec: add a CUVID hwaccel

2017-07-24 Thread Anton Khirnov
Some parts of the code are based on a patch by
Timo Rothenpieler 
---
Now with high bit depth support
---
 Changelog   |   1 +
 avtools/avconv.h|   1 +
 avtools/avconv_opt.c|   4 +
 configure   |  11 +-
 libavcodec/Makefile |   2 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/cuvid.c  | 425 
 libavcodec/cuvid.h  |  61 +++
 libavcodec/cuvid_h264.c | 177 
 libavcodec/h264_slice.c |   6 +-
 10 files changed, 687 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/cuvid.c
 create mode 100644 libavcodec/cuvid.h
 create mode 100644 libavcodec/cuvid_h264.c

diff --git a/Changelog b/Changelog
index adcca3f..f3c8f7a 100644
--- a/Changelog
+++ b/Changelog
@@ -17,6 +17,7 @@ version :
 - ClearVideo decoder (I-frames only)
 - support for decoding through D3D11VA in avconv
 - Cinepak encoder
+- NVIDIA CUVID-accelerated H.264 decoding
 
 
 version 12:
diff --git a/avtools/avconv.h b/avtools/avconv.h
index 4c69933..b5843fb 100644
--- a/avtools/avconv.h
+++ b/avtools/avconv.h
@@ -58,6 +58,7 @@ enum HWAccelID {
 HWACCEL_QSV,
 HWACCEL_VAAPI,
 HWACCEL_D3D11VA,
+HWACCEL_CUVID,
 };
 
 typedef struct HWAccel {
diff --git a/avtools/avconv_opt.c b/avtools/avconv_opt.c
index 575ce12..df69336 100644
--- a/avtools/avconv_opt.c
+++ b/avtools/avconv_opt.c
@@ -80,6 +80,10 @@ const HWAccel hwaccels[] = {
 { "vaapi", hwaccel_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI,
   AV_HWDEVICE_TYPE_VAAPI },
 #endif
+#if CONFIG_CUVID
+{ "cuvid", hwaccel_decode_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA,
+   AV_HWDEVICE_TYPE_CUDA },
+#endif
 { 0 },
 };
 int hwaccel_lax_profile_check = 0;
diff --git a/configure b/configure
index d92ce33..0eeb46b 100755
--- a/configure
+++ b/configure
@@ -237,6 +237,7 @@ External library support:
 
   The following libraries provide various hardware acceleration features:
   --enable-cudaNvidia CUDA (dynamically linked)
+  --enable-cuvid   Nvidia CUVID video decode acceleration
   --enable-d3d11va Microsoft Direct3D 11 video acceleration [auto]
   --enable-dxva2   Microsoft DirectX 9 video acceleration [auto]
   --enable-libmfx  Intel MediaSDK (AKA Quick Sync Video)
@@ -1266,6 +1267,7 @@ EXTRALIBS_LIST="
 
 HWACCEL_LIBRARY_NONFREE_LIST="
 cuda
+cuvid
 libnpp
 "
 HWACCEL_LIBRARY_LIST="
@@ -1686,6 +1688,7 @@ TOOLCHAIN_FEATURES="
 
 TYPES_LIST="
 CONDITION_VARIABLE_Ptr
+CUVIDDECODECREATEINFO_bitDepthMinus8
 socklen_t
 struct_addrinfo
 struct_group_source_req
@@ -2189,6 +2192,8 @@ vda_extralibs="-framework CoreFoundation -framework 
VideoDecodeAcceleration -fra
 
 h263_vaapi_hwaccel_deps="vaapi"
 h263_vaapi_hwaccel_select="h263_decoder"
+h264_cuvid_hwaccel_deps="cuvid CUVIDH264PICPARAMS"
+h264_cuvid_hwaccel_select="h264_decoder"
 h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
 h264_d3d11va2_hwaccel_deps="d3d11va"
@@ -2554,7 +2559,7 @@ avdevice_extralibs="libm_extralibs"
 avformat_extralibs="libm_extralibs"
 avfilter_extralibs="pthreads_extralibs libm_extralibs"
 avresample_extralibs="libm_extralibs"
-avutil_extralibs="clock_gettime_extralibs cuda_extralibs libm_extralibs 
libmfx_extralibs nanosleep_extralibs pthreads_extralibs user32_extralibs 
vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs 
wincrypt_extralibs"
+avutil_extralibs="clock_gettime_extralibs cuda_extralibs cuvid_extralibs 
libm_extralibs libmfx_extralibs nanosleep_extralibs pthreads_extralibs 
user32_extralibs vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs 
vdpau_x11_extralibs wincrypt_extralibs"
 swscale_extralibs="libm_extralibs"
 
 # programs
@@ -4692,6 +4697,9 @@ check_lib psapi"windows.h psapi.h"
GetProcessMemoryInfo -lpsapi
 
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 
+check_type "cuviddec.h" "CUVIDH264PICPARAMS"
+check_struct "cuviddec.h" "CUVIDDECODECREATEINFO" bitDepthMinus8
+
 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" 
-DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
 check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
 check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
@@ -4751,6 +4759,7 @@ done
 enabled avisynth  && require_header avisynth/avisynth_c.h
 enabled avxsynth  && require_header avxsynth/avxsynth_c.h
 enabled cuda  && require cuda cuda.h cuInit -lcuda
+enabled cuvid && require cuvid cuviddec.h cuvidCreateDecoder 
-lnvcuvid
 enabled frei0r&& require_header frei0r.h
 enabled gnutls&& require_pkg_config gnutls gnutls gnutls/gnutls.h 
gnutls_global_init
 enabled libbs2b   && require_pkg_config libbs2b libbs2b bs2b.h 
bs2b_open
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2b91588..66f6f9e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -625,6 +625,7 @@ 

Re: [libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly

2017-07-24 Thread Anton Khirnov
Quoting Hendrik Leppkes (2017-07-24 12:09:52)
> On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnov  wrote:
> > This allows running those tests with hwaccel.
> > ---
> >  tests/fate/hevc.mak | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
> > index 5446969..fe3ef26 100644
> > --- a/tests/fate/hevc.mak
> > +++ b/tests/fate/hevc.mak
> > @@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\
> >
> >  define FATE_HEVC_TEST
> >  FATE_HEVC += fate-hevc-conformance-$(1)
> > -fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
> > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit
> > +fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
> > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p
> >  endef
> >
> >  define FATE_HEVC_TEST_10BIT
> 
> While you're in here, how about the 10-bit tests as well?

That's already done for 10bit, to get the same endianness everywhere.

-- 
Anton Khirnov
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly

2017-07-24 Thread Hendrik Leppkes
On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnov  wrote:
> This allows running those tests with hwaccel.
> ---
>  tests/fate/hevc.mak | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
> index 5446969..fe3ef26 100644
> --- a/tests/fate/hevc.mak
> +++ b/tests/fate/hevc.mak
> @@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\
>
>  define FATE_HEVC_TEST
>  FATE_HEVC += fate-hevc-conformance-$(1)
> -fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
> $(TARGET_SAMPLES)/hevc-conformance/$(1).bit
> +fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
> $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p
>  endef
>
>  define FATE_HEVC_TEST_10BIT

While you're in here, how about the 10-bit tests as well?

- Hendrik
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 3/3] hevcdec: add a CUVID hwaccel

2017-07-24 Thread Hendrik Leppkes
On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnov  wrote:
> ---
>  Changelog   |   2 +-
>  configure   |   3 +
>  libavcodec/Makefile |   1 +
>  libavcodec/allcodecs.c  |   1 +
>  libavcodec/cuvid.c  |   1 +
>  libavcodec/cuvid_hevc.c | 280 
> 
>  libavcodec/hevcdec.c|   6 +-
>  7 files changed, 292 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/cuvid_hevc.c
>

CUVID supports 10-bit decoding (and even 12-bit), any reason you
didn't implement that?

- Hendrik
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 1/3] hevcdec: set the active SPS before calling get_format()

2017-07-24 Thread Anton Khirnov
This way the SPS is available to the hwaccel init code.
---
 libavcodec/hevcdec.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index f6bbb70..664e4ac 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -490,13 +490,14 @@ static int hls_slice_header(HEVCContext *s)
 
 ff_hevc_clear_refs(s);
 
+ret = set_sps(s, sps, sps->pix_fmt);
+if (ret < 0)
+return ret;
+
 pix_fmt = get_format(s, sps);
 if (pix_fmt < 0)
 return pix_fmt;
-
-ret = set_sps(s, sps, pix_fmt);
-if (ret < 0)
-return ret;
+s->avctx->pix_fmt = pix_fmt;
 
 s->seq_decode = (s->seq_decode + 1) & 0xff;
 s->max_ra = INT_MAX;
-- 
2.0.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly

2017-07-24 Thread Anton Khirnov
This allows running those tests with hwaccel.
---
 tests/fate/hevc.mak | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
index 5446969..fe3ef26 100644
--- a/tests/fate/hevc.mak
+++ b/tests/fate/hevc.mak
@@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\
 
 define FATE_HEVC_TEST
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit
+fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p
 endef
 
 define FATE_HEVC_TEST_10BIT
-- 
2.0.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/3] hevcdec: add a CUVID hwaccel

2017-07-24 Thread Anton Khirnov
---
 Changelog   |   2 +-
 configure   |   3 +
 libavcodec/Makefile |   1 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/cuvid.c  |   1 +
 libavcodec/cuvid_hevc.c | 280 
 libavcodec/hevcdec.c|   6 +-
 7 files changed, 292 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/cuvid_hevc.c

diff --git a/Changelog b/Changelog
index f3c8f7a..59a2eba 100644
--- a/Changelog
+++ b/Changelog
@@ -17,7 +17,7 @@ version :
 - ClearVideo decoder (I-frames only)
 - support for decoding through D3D11VA in avconv
 - Cinepak encoder
-- NVIDIA CUVID-accelerated H.264 decoding
+- NVIDIA CUVID-accelerated H.264 and HEVC decoding
 
 
 version 12:
diff --git a/configure b/configure
index d31403c..cf6b862 100755
--- a/configure
+++ b/configure
@@ -2209,6 +2209,8 @@ h264_vda_old_hwaccel_deps="vda"
 h264_vda_old_hwaccel_select="h264_decoder"
 h264_vdpau_hwaccel_deps="vdpau"
 h264_vdpau_hwaccel_select="h264_decoder"
+hevc_cuvid_hwaccel_deps="cuvid CUVIDHEVCPICPARAMS"
+hevc_cuvid_hwaccel_select="hevc_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
 hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
@@ -4697,6 +4699,7 @@ check_lib psapi"windows.h psapi.h"
GetProcessMemoryInfo -lpsapi
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 
 check_type "cuviddec.h" "CUVIDH264PICPARAMS"
+check_type "cuviddec.h" "CUVIDHEVCPICPARAMS"
 
 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" 
-DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
 check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 66f6f9e..12c8678 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -640,6 +640,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL)   += qsvdec_h2645.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDA_HWACCEL)   += vda_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
+OBJS-$(CONFIG_HEVC_CUVID_HWACCEL) += cuvid_hevc.o
 OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)   += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)   += qsvdec_h2645.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 97b8810..717e18f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -78,6 +78,7 @@ void avcodec_register_all(void)
 REGISTER_HWACCEL(H264_VDA,  h264_vda);
 REGISTER_HWACCEL(H264_VDA_OLD,  h264_vda_old);
 REGISTER_HWACCEL(H264_VDPAU,h264_vdpau);
+REGISTER_HWACCEL(HEVC_CUVID,hevc_cuvid);
 REGISTER_HWACCEL(HEVC_D3D11VA,  hevc_d3d11va);
 REGISTER_HWACCEL(HEVC_D3D11VA2, hevc_d3d11va2);
 REGISTER_HWACCEL(HEVC_DXVA2,hevc_dxva2);
diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index 22b966c..9abccb6 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -51,6 +51,7 @@ static int map_avcodec_id(enum AVCodecID id)
 {
 switch (id) {
 case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
+case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
 }
 return -1;
 }
diff --git a/libavcodec/cuvid_hevc.c b/libavcodec/cuvid_hevc.c
new file mode 100644
index 000..5de9bca
--- /dev/null
+++ b/libavcodec/cuvid_hevc.c
@@ -0,0 +1,280 @@
+/*
+ * HEVC HW decode acceleration through CUVID
+ *
+ * Copyright (c) 2017 Anton Khirnov
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include 
+#include 
+
+#include "avcodec.h"
+#include "cuvid.h"
+#include "decode.h"
+#include "internal.h"
+#include "hevcdec.h"
+#include "hevc_data.h"
+
+static void dpb_add(CUVIDHEVCPICPARAMS *pp, int idx, const HEVCFrame *src)
+{
+FrameDecodeData *fdd = (FrameDecodeData*)src->frame->opaque_ref->data;
+const CUVIDFrame *cf = fdd->hwaccel_priv;
+
+pp->RefPicIdx[idx]  = cf ? cf->idx : -1;
+pp->PicOrderCntVal[idx] = src->poc;
+pp->IsLongTerm[idx] = !!(src->flags & HEVC_FRAME_FLAG_LONG_REF);
+}
+
+static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s)
+{
+const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
+