[libav-devel] [PATCH 4/4] h264dec: Fix mix of lossless and lossy MBs decoding
From: Anton MitrofanovCC: libav-sta...@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_cabac.c | 16 libavcodec/h264_cavlc.c | 16 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 5dd285c..c0b9e30 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -2371,14 +2371,6 @@ decode_intra_mb: const uint8_t *scan, *scan8x8; const uint32_t *qmul; -if(IS_INTERLACED(mb_type)){ -scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; -scan= sl->qscale ? h->field_scan : h->field_scan_q0; -}else{ -scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; -scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; -} - // decode_cabac_mb_dqp if(get_cabac_noinline( >cabac, >cabac_state[60 + (sl->last_qscale_diff != 0)])){ int val = 1; @@ -2409,6 +2401,14 @@ decode_intra_mb: }else sl->last_qscale_diff=0; +if(IS_INTERLACED(mb_type)){ +scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; +scan= sl->qscale ? h->field_scan : h->field_scan_q0; +}else{ +scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; +scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; +} + decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, cbp, 0); if (CHROMA444(h)) { decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, cbp, 1); diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index c11e211..d57062b 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -1093,14 +1093,6 @@ decode_intra_mb: const uint8_t *scan, *scan8x8; const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8); -if(IS_INTERLACED(mb_type)){ -scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; -scan= sl->qscale ? h->field_scan : h->field_scan_q0; -}else{ -scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; -scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; -} - dquant= get_se_golomb(>gb); sl->qscale += dquant; @@ -1117,6 +1109,14 @@ decode_intra_mb: sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale); sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale); +if(IS_INTERLACED(mb_type)){ +scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; +scan= sl->qscale ? h->field_scan : h->field_scan_q0; +}else{ +scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; +scan= sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; +} + if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) { return -1; } -- 2.0.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/4] h264_cabac: Fix CABAC+8x8dct in 4:4:4
From: Anton MitrofanovUse the correct ctxIdxInc calculation for coded_block_flag. Keep old behavior for old versions of x264 for backward compatibility. CC: libav-sta...@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_cabac.c | 47 +-- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index b28e486..5dd285c 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -2329,21 +2329,40 @@ decode_intra_mb: if (CHROMA444(h) && IS_8x8DCT(mb_type)){ int i; uint8_t *nnz_cache = sl->non_zero_count_cache; -for (i = 0; i < 2; i++){ -if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { -nnz_cache[3+8* 1 + 2*8*i]= -nnz_cache[3+8* 2 + 2*8*i]= -nnz_cache[3+8* 6 + 2*8*i]= -nnz_cache[3+8* 7 + 2*8*i]= -nnz_cache[3+8*11 + 2*8*i]= -nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0; +if (h->x264_build < 151U) { +for (i = 0; i < 2; i++){ +if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { +nnz_cache[3+8* 1 + 2*8*i]= +nnz_cache[3+8* 2 + 2*8*i]= +nnz_cache[3+8* 6 + 2*8*i]= +nnz_cache[3+8* 7 + 2*8*i]= +nnz_cache[3+8*11 + 2*8*i]= +nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0; +} +} +if (sl->top_type && !IS_8x8DCT(sl->top_type)){ +uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040; +AV_WN32A(_cache[4+8* 0], top_empty); +AV_WN32A(_cache[4+8* 5], top_empty); +AV_WN32A(_cache[4+8*10], top_empty); +} +} else { +for (i = 0; i < 2; i++){ +if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { +nnz_cache[3+8* 1 + 2*8*i]= +nnz_cache[3+8* 2 + 2*8*i]= +nnz_cache[3+8* 6 + 2*8*i]= +nnz_cache[3+8* 7 + 2*8*i]= +nnz_cache[3+8*11 + 2*8*i]= +nnz_cache[3+8*12 + 2*8*i]= !IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64; +} +} +if (sl->top_type && !IS_8x8DCT(sl->top_type)){ +uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 0x40404040; +AV_WN32A(_cache[4+8* 0], top_empty); +AV_WN32A(_cache[4+8* 5], top_empty); +AV_WN32A(_cache[4+8*10], top_empty); } -} -if (sl->top_type && !IS_8x8DCT(sl->top_type)){ -uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040; -AV_WN32A(_cache[4+8* 0], top_empty); -AV_WN32A(_cache[4+8* 5], top_empty); -AV_WN32A(_cache[4+8*10], top_empty); } } h->cur_pic.mb_type[mb_xy] = mb_type; -- 2.0.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/4] h264dec: track the last seen value of x264_build
Do not use the one in the SEI directly as that is reset at certain points. Inspired by patches from Michael Niedermayerand Anton Mitrofanov . CC: libav-sta...@libav.org --- libavcodec/h264_direct.c | 4 ++-- libavcodec/h264_slice.c | 6 +- libavcodec/h264dec.c | 1 + libavcodec/h264dec.h | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c index 7ec49b6..abac259 100644 --- a/libavcodec/h264_direct.c +++ b/libavcodec/h264_direct.c @@ -391,7 +391,7 @@ single_col: (l1ref0[0] < 0 && !l1ref1[0] && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 && - h->sei.unregistered.x264_build > 33U))) { + h->x264_build > 33U))) { a = b = 0; if (ref[0] > 0) a = mv[0]; @@ -426,7 +426,7 @@ single_col: (l1ref0[i8] == 0 || (l1ref0[i8] < 0 && l1ref1[i8] == 0 && - h->sei.unregistered.x264_build > 33U))) { + h->x264_build > 33U))) { const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1; if (IS_SUB_8X8(sub_mb_type)) { const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride]; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index c9f1dbb..e7408b2 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -403,6 +403,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst, h->enable_er = h1->enable_er; h->workaround_bugs = h1->workaround_bugs; +h->x264_build = h1->x264_build; h->droppable = h1->droppable; // extradata/NAL handling @@ -509,6 +510,9 @@ static int h264_frame_start(H264Context *h) h->mb_aff_frame = h->ps.sps->mb_aff && (h->picture_structure == PICT_FRAME); +if (h->sei.unregistered.x264_build >= 0) +h->x264_build = h->sei.unregistered.x264_build; + assert(h->cur_pic_ptr->long_ref == 0); return 0; @@ -847,7 +851,7 @@ static int h264_slice_header_init(H264Context *h) if (sps->timing_info_present_flag) { int64_t den = sps->time_scale; -if (h->sei.unregistered.x264_build < 44U) +if (h->x264_build < 44U) den *= 2; av_reduce(>avctx->framerate.den, >avctx->framerate.num, sps->num_units_in_tick, den, 1 << 30); diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 2a532a7..7a8293e 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -293,6 +293,7 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) h->flags = avctx->flags; h->poc.prev_poc_msb = 1 << 16; h->recovery_frame= -1; +h->x264_build= -1; h->frame_recovered = 0; h->next_outputed_poc = INT_MIN; diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h index fc7beeb..ddfe224 100644 --- a/libavcodec/h264dec.h +++ b/libavcodec/h264dec.h @@ -361,6 +361,7 @@ typedef struct H264Context { int context_initialized; int flags; int workaround_bugs; +int x264_build; /* Set when slice threading is used and at least one slice uses deblocking * mode 1 (i.e. across slice boundaries). Then we disable the loop filter * during normal MB decoding and execute it serially at the end. -- 2.0.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/4] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction
From: Yogender Kumar GuptaCC: libav-sta...@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_mb.c | 7 +++- libavcodec/h264pred.c | 2 ++ libavcodec/h264pred.h | 3 ++ libavcodec/h264pred_template.c | 73 ++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index f037bd5..51d73ce 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -636,7 +636,12 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, uint8_t *const ptr = dest_y + block_offset[i]; const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { -h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +if (h->x264_build < 151U) { +h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +} else +h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), +(sl-> topleft_samples_available << i) & 0x8000, + (sl->topright_samples_available << i) & 0x4000, linesize); } else { const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 7627eb0..135babc 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ +h->pred8x8l_filter_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_filter_add , depth);\ +h->pred8x8l_filter_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_filter_add , depth);\ if (chroma_format_idc <= 1) {\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 60e74349..795d8f3 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,6 +101,9 @@ typedef struct H264PredContext { int16_t *block /*align 16*/, ptrdiff_t stride); void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, int16_t *block /*align 16*/, ptrdiff_t stride); +void(*pred8x8l_filter_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, + int topleft, int topright, ptrdiff_t stride); void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, const int *block_offset, int16_t *block /*align 16*/, ptrdiff_t stride); diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 8492b2b..02494aa 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; } + +static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, +int has_topright, ptrdiff_t _stride) +{ +int i; +pixel *src = (pixel*)_src; +const dctcoef *block = (const dctcoef*)_block; +pixel pix[8]; +int stride = _stride/sizeof(pixel); +PREDICT_8x8_LOAD_TOP; + +pix[0] = t0; +pix[1] = t1; +pix[2] = t2; +pix[3] = t3; +pix[4] = t4; +pix[5] = t5; +pix[6] = t6; +pix[7] = t7; + +for (i = 0; i < 8; i++) { +pixel v = pix[i]; +src[0 * stride] = v += block[0]; +src[1 * stride] = v += block[8]; +src[2 * stride] = v += block[16]; +src[3 * stride] = v += block[24]; +src[4 * stride] = v += block[32]; +src[5 * stride] = v += block[40]; +src[6 * stride] = v += block[48]; +src[7 * stride] = v + block[56]; +src++; +block++; +} + +memset(_block, 0, sizeof(dctcoef) * 64); +} + +static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ +int i; +
Re: [libav-devel] [PATCH 06/15] lavc: Add coded bitstream read/write support for H.265
Quoting Mark Thompson (2017-06-24 01:39:12) > --- > libavcodec/cbs.c |1 + > libavcodec/cbs_h2645.c | 410 +++- > libavcodec/cbs_h265.h| 544 > libavcodec/cbs_h265_syntax.c | 1482 > ++ > libavcodec/cbs_internal.h|1 + > 5 files changed, 2435 insertions(+), 3 deletions(-) > create mode 100644 libavcodec/cbs_h265.h > create mode 100644 libavcodec/cbs_h265_syntax.c > Looks okish from a quick look -- Anton Khirnov ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] h264dec: add a CUVID hwaccel
Quoting Luca Barbato (2017-07-24 18:21:20) > On 24/07/2017 15:15, Anton Khirnov wrote: > > Some parts of the code are based on a patch by > > Timo Rothenpieler> > --- > > Now with high bit depth support > > --- > > I recently updated the nvidia-video-codec distribution to the version 8, > I guess that's needed for that, isn't it? To get 10bit decoding yes. 8bit should still work with older versions of the header/derivers. -- Anton Khirnov ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] hevcdec: add a CUVID hwaccel
On 24/07/2017 15:16, Anton Khirnov wrote: > --- > Now with 10bit decoding > --- > Changelog | 2 +- > configure | 3 + > libavcodec/Makefile | 1 + > libavcodec/allcodecs.c | 1 + > libavcodec/cuvid.c | 1 + > libavcodec/cuvid_hevc.c | 280 > > libavcodec/hevcdec.c| 9 +- > 7 files changed, 295 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/cuvid_hevc.c > Probably ok, I'll test tonight. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] h264dec: add a CUVID hwaccel
On 24/07/2017 15:15, Anton Khirnov wrote: > Some parts of the code are based on a patch by > Timo Rothenpieler> --- > Now with high bit depth support > --- I recently updated the nvidia-video-codec distribution to the version 8, I guess that's needed for that, isn't it? lu ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] hevcdec: add a CUVID hwaccel
--- Now with 10bit decoding --- Changelog | 2 +- configure | 3 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/cuvid.c | 1 + libavcodec/cuvid_hevc.c | 280 libavcodec/hevcdec.c| 9 +- 7 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 libavcodec/cuvid_hevc.c diff --git a/Changelog b/Changelog index f3c8f7a..59a2eba 100644 --- a/Changelog +++ b/Changelog @@ -17,7 +17,7 @@ version : - ClearVideo decoder (I-frames only) - support for decoding through D3D11VA in avconv - Cinepak encoder -- NVIDIA CUVID-accelerated H.264 decoding +- NVIDIA CUVID-accelerated H.264 and HEVC decoding version 12: diff --git a/configure b/configure index 0eeb46b..23e0ab6 100755 --- a/configure +++ b/configure @@ -2210,6 +2210,8 @@ h264_vda_old_hwaccel_deps="vda" h264_vda_old_hwaccel_select="h264_decoder" h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" +hevc_cuvid_hwaccel_deps="cuvid CUVIDHEVCPICPARAMS" +hevc_cuvid_hwaccel_select="hevc_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" @@ -4698,6 +4700,7 @@ check_lib psapi"windows.h psapi.h" GetProcessMemoryInfo -lpsapi check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss check_type "cuviddec.h" "CUVIDH264PICPARAMS" +check_type "cuviddec.h" "CUVIDHEVCPICPARAMS" check_struct "cuviddec.h" "CUVIDDECODECREATEINFO" bitDepthMinus8 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0 diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 66f6f9e..12c8678 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -640,6 +640,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec_h2645.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o +OBJS-$(CONFIG_HEVC_CUVID_HWACCEL) += cuvid_hevc.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec_h2645.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 97b8810..717e18f 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -78,6 +78,7 @@ void avcodec_register_all(void) REGISTER_HWACCEL(H264_VDA, h264_vda); REGISTER_HWACCEL(H264_VDA_OLD, h264_vda_old); REGISTER_HWACCEL(H264_VDPAU,h264_vdpau); +REGISTER_HWACCEL(HEVC_CUVID,hevc_cuvid); REGISTER_HWACCEL(HEVC_D3D11VA, hevc_d3d11va); REGISTER_HWACCEL(HEVC_D3D11VA2, hevc_d3d11va2); REGISTER_HWACCEL(HEVC_DXVA2,hevc_dxva2); diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index 69f624c..2d35e92 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -53,6 +53,7 @@ static int map_avcodec_id(enum AVCodecID id) { switch (id) { case AV_CODEC_ID_H264: return cudaVideoCodec_H264; +case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; } return -1; } diff --git a/libavcodec/cuvid_hevc.c b/libavcodec/cuvid_hevc.c new file mode 100644 index 000..5de9bca --- /dev/null +++ b/libavcodec/cuvid_hevc.c @@ -0,0 +1,280 @@ +/* + * HEVC HW decode acceleration through CUVID + * + * Copyright (c) 2017 Anton Khirnov + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include "avcodec.h" +#include "cuvid.h" +#include "decode.h" +#include "internal.h" +#include "hevcdec.h" +#include "hevc_data.h" + +static void dpb_add(CUVIDHEVCPICPARAMS *pp, int idx, const HEVCFrame *src) +{ +FrameDecodeData *fdd = (FrameDecodeData*)src->frame->opaque_ref->data; +const CUVIDFrame *cf = fdd->hwaccel_priv; + +pp->RefPicIdx[idx] = cf ? cf->idx : -1; +pp->PicOrderCntVal[idx] = src->poc; +pp->IsLongTerm[idx] = !!(src->flags & HEVC_FRAME_FLAG_LONG_REF); +} + +static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s) +{ +const ScalingList *sl =
[libav-devel] [PATCH] h264dec: add a CUVID hwaccel
Some parts of the code are based on a patch by Timo Rothenpieler--- Now with high bit depth support --- Changelog | 1 + avtools/avconv.h| 1 + avtools/avconv_opt.c| 4 + configure | 11 +- libavcodec/Makefile | 2 + libavcodec/allcodecs.c | 1 + libavcodec/cuvid.c | 425 libavcodec/cuvid.h | 61 +++ libavcodec/cuvid_h264.c | 177 libavcodec/h264_slice.c | 6 +- 10 files changed, 687 insertions(+), 2 deletions(-) create mode 100644 libavcodec/cuvid.c create mode 100644 libavcodec/cuvid.h create mode 100644 libavcodec/cuvid_h264.c diff --git a/Changelog b/Changelog index adcca3f..f3c8f7a 100644 --- a/Changelog +++ b/Changelog @@ -17,6 +17,7 @@ version : - ClearVideo decoder (I-frames only) - support for decoding through D3D11VA in avconv - Cinepak encoder +- NVIDIA CUVID-accelerated H.264 decoding version 12: diff --git a/avtools/avconv.h b/avtools/avconv.h index 4c69933..b5843fb 100644 --- a/avtools/avconv.h +++ b/avtools/avconv.h @@ -58,6 +58,7 @@ enum HWAccelID { HWACCEL_QSV, HWACCEL_VAAPI, HWACCEL_D3D11VA, +HWACCEL_CUVID, }; typedef struct HWAccel { diff --git a/avtools/avconv_opt.c b/avtools/avconv_opt.c index 575ce12..df69336 100644 --- a/avtools/avconv_opt.c +++ b/avtools/avconv_opt.c @@ -80,6 +80,10 @@ const HWAccel hwaccels[] = { { "vaapi", hwaccel_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI, AV_HWDEVICE_TYPE_VAAPI }, #endif +#if CONFIG_CUVID +{ "cuvid", hwaccel_decode_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA, + AV_HWDEVICE_TYPE_CUDA }, +#endif { 0 }, }; int hwaccel_lax_profile_check = 0; diff --git a/configure b/configure index d92ce33..0eeb46b 100755 --- a/configure +++ b/configure @@ -237,6 +237,7 @@ External library support: The following libraries provide various hardware acceleration features: --enable-cudaNvidia CUDA (dynamically linked) + --enable-cuvid Nvidia CUVID video decode acceleration --enable-d3d11va Microsoft Direct3D 11 video acceleration [auto] --enable-dxva2 Microsoft DirectX 9 video acceleration [auto] --enable-libmfx Intel MediaSDK (AKA Quick Sync Video) @@ -1266,6 +1267,7 @@ EXTRALIBS_LIST=" HWACCEL_LIBRARY_NONFREE_LIST=" cuda +cuvid libnpp " HWACCEL_LIBRARY_LIST=" @@ -1686,6 +1688,7 @@ TOOLCHAIN_FEATURES=" TYPES_LIST=" CONDITION_VARIABLE_Ptr +CUVIDDECODECREATEINFO_bitDepthMinus8 socklen_t struct_addrinfo struct_group_source_req @@ -2189,6 +2192,8 @@ vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -fra h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" +h264_cuvid_hwaccel_deps="cuvid CUVIDH264PICPARAMS" +h264_cuvid_hwaccel_select="h264_decoder" h264_d3d11va_hwaccel_deps="d3d11va" h264_d3d11va_hwaccel_select="h264_decoder" h264_d3d11va2_hwaccel_deps="d3d11va" @@ -2554,7 +2559,7 @@ avdevice_extralibs="libm_extralibs" avformat_extralibs="libm_extralibs" avfilter_extralibs="pthreads_extralibs libm_extralibs" avresample_extralibs="libm_extralibs" -avutil_extralibs="clock_gettime_extralibs cuda_extralibs libm_extralibs libmfx_extralibs nanosleep_extralibs pthreads_extralibs user32_extralibs vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs wincrypt_extralibs" +avutil_extralibs="clock_gettime_extralibs cuda_extralibs cuvid_extralibs libm_extralibs libmfx_extralibs nanosleep_extralibs pthreads_extralibs user32_extralibs vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs wincrypt_extralibs" swscale_extralibs="libm_extralibs" # programs @@ -4692,6 +4697,9 @@ check_lib psapi"windows.h psapi.h" GetProcessMemoryInfo -lpsapi check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss +check_type "cuviddec.h" "CUVIDH264PICPARAMS" +check_struct "cuviddec.h" "CUVIDDECODECREATEINFO" bitDepthMinus8 + check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0 check_type "windows.h d3d11.h" "ID3D11VideoDecoder" check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602 @@ -4751,6 +4759,7 @@ done enabled avisynth && require_header avisynth/avisynth_c.h enabled avxsynth && require_header avxsynth/avxsynth_c.h enabled cuda && require cuda cuda.h cuInit -lcuda +enabled cuvid && require cuvid cuviddec.h cuvidCreateDecoder -lnvcuvid enabled frei0r&& require_header frei0r.h enabled gnutls&& require_pkg_config gnutls gnutls gnutls/gnutls.h gnutls_global_init enabled libbs2b && require_pkg_config libbs2b libbs2b bs2b.h bs2b_open diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2b91588..66f6f9e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -625,6 +625,7 @@
Re: [libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly
Quoting Hendrik Leppkes (2017-07-24 12:09:52) > On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnovwrote: > > This allows running those tests with hwaccel. > > --- > > tests/fate/hevc.mak | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak > > index 5446969..fe3ef26 100644 > > --- a/tests/fate/hevc.mak > > +++ b/tests/fate/hevc.mak > > @@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\ > > > > define FATE_HEVC_TEST > > FATE_HEVC += fate-hevc-conformance-$(1) > > -fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i > > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit > > +fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i > > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p > > endef > > > > define FATE_HEVC_TEST_10BIT > > While you're in here, how about the 10-bit tests as well? That's already done for 10bit, to get the same endianness everywhere. -- Anton Khirnov ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly
On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnovwrote: > This allows running those tests with hwaccel. > --- > tests/fate/hevc.mak | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak > index 5446969..fe3ef26 100644 > --- a/tests/fate/hevc.mak > +++ b/tests/fate/hevc.mak > @@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\ > > define FATE_HEVC_TEST > FATE_HEVC += fate-hevc-conformance-$(1) > -fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit > +fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i > $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p > endef > > define FATE_HEVC_TEST_10BIT While you're in here, how about the 10-bit tests as well? - Hendrik ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3] hevcdec: add a CUVID hwaccel
On Mon, Jul 24, 2017 at 11:46 AM, Anton Khirnovwrote: > --- > Changelog | 2 +- > configure | 3 + > libavcodec/Makefile | 1 + > libavcodec/allcodecs.c | 1 + > libavcodec/cuvid.c | 1 + > libavcodec/cuvid_hevc.c | 280 > > libavcodec/hevcdec.c| 6 +- > 7 files changed, 292 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/cuvid_hevc.c > CUVID supports 10-bit decoding (and even 12-bit), any reason you didn't implement that? - Hendrik ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/3] hevcdec: set the active SPS before calling get_format()
This way the SPS is available to the hwaccel init code. --- libavcodec/hevcdec.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index f6bbb70..664e4ac 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -490,13 +490,14 @@ static int hls_slice_header(HEVCContext *s) ff_hevc_clear_refs(s); +ret = set_sps(s, sps, sps->pix_fmt); +if (ret < 0) +return ret; + pix_fmt = get_format(s, sps); if (pix_fmt < 0) return pix_fmt; - -ret = set_sps(s, sps, pix_fmt); -if (ret < 0) -return ret; +s->avctx->pix_fmt = pix_fmt; s->seq_decode = (s->seq_decode + 1) & 0xff; s->max_ra = INT_MAX; -- 2.0.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/3] fate/hevc: specify output pixel format explicitly
This allows running those tests with hwaccel. --- tests/fate/hevc.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak index 5446969..fe3ef26 100644 --- a/tests/fate/hevc.mak +++ b/tests/fate/hevc.mak @@ -144,7 +144,7 @@ HEVC_SAMPLES_10BIT =\ define FATE_HEVC_TEST FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit +fate-hevc-conformance-$(1): CMD = framecrc -vsync 0 -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p endef define FATE_HEVC_TEST_10BIT -- 2.0.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/3] hevcdec: add a CUVID hwaccel
--- Changelog | 2 +- configure | 3 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/cuvid.c | 1 + libavcodec/cuvid_hevc.c | 280 libavcodec/hevcdec.c| 6 +- 7 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 libavcodec/cuvid_hevc.c diff --git a/Changelog b/Changelog index f3c8f7a..59a2eba 100644 --- a/Changelog +++ b/Changelog @@ -17,7 +17,7 @@ version : - ClearVideo decoder (I-frames only) - support for decoding through D3D11VA in avconv - Cinepak encoder -- NVIDIA CUVID-accelerated H.264 decoding +- NVIDIA CUVID-accelerated H.264 and HEVC decoding version 12: diff --git a/configure b/configure index d31403c..cf6b862 100755 --- a/configure +++ b/configure @@ -2209,6 +2209,8 @@ h264_vda_old_hwaccel_deps="vda" h264_vda_old_hwaccel_select="h264_decoder" h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" +hevc_cuvid_hwaccel_deps="cuvid CUVIDHEVCPICPARAMS" +hevc_cuvid_hwaccel_select="hevc_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" @@ -4697,6 +4699,7 @@ check_lib psapi"windows.h psapi.h" GetProcessMemoryInfo -lpsapi check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss check_type "cuviddec.h" "CUVIDH264PICPARAMS" +check_type "cuviddec.h" "CUVIDHEVCPICPARAMS" check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0 check_type "windows.h d3d11.h" "ID3D11VideoDecoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 66f6f9e..12c8678 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -640,6 +640,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec_h2645.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o +OBJS-$(CONFIG_HEVC_CUVID_HWACCEL) += cuvid_hevc.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec_h2645.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 97b8810..717e18f 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -78,6 +78,7 @@ void avcodec_register_all(void) REGISTER_HWACCEL(H264_VDA, h264_vda); REGISTER_HWACCEL(H264_VDA_OLD, h264_vda_old); REGISTER_HWACCEL(H264_VDPAU,h264_vdpau); +REGISTER_HWACCEL(HEVC_CUVID,hevc_cuvid); REGISTER_HWACCEL(HEVC_D3D11VA, hevc_d3d11va); REGISTER_HWACCEL(HEVC_D3D11VA2, hevc_d3d11va2); REGISTER_HWACCEL(HEVC_DXVA2,hevc_dxva2); diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index 22b966c..9abccb6 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -51,6 +51,7 @@ static int map_avcodec_id(enum AVCodecID id) { switch (id) { case AV_CODEC_ID_H264: return cudaVideoCodec_H264; +case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; } return -1; } diff --git a/libavcodec/cuvid_hevc.c b/libavcodec/cuvid_hevc.c new file mode 100644 index 000..5de9bca --- /dev/null +++ b/libavcodec/cuvid_hevc.c @@ -0,0 +1,280 @@ +/* + * HEVC HW decode acceleration through CUVID + * + * Copyright (c) 2017 Anton Khirnov + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include "avcodec.h" +#include "cuvid.h" +#include "decode.h" +#include "internal.h" +#include "hevcdec.h" +#include "hevc_data.h" + +static void dpb_add(CUVIDHEVCPICPARAMS *pp, int idx, const HEVCFrame *src) +{ +FrameDecodeData *fdd = (FrameDecodeData*)src->frame->opaque_ref->data; +const CUVIDFrame *cf = fdd->hwaccel_priv; + +pp->RefPicIdx[idx] = cf ? cf->idx : -1; +pp->PicOrderCntVal[idx] = src->poc; +pp->IsLongTerm[idx] = !!(src->flags & HEVC_FRAME_FLAG_LONG_REF); +} + +static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s) +{ +const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ? +