[libav-commits] hwcontext_cuda: Add P010 and YUV444P16 pixel format
Module: libav Branch: master Commit: 340f12f71207513672b5165d810cb6c8622c6b21 Author:Yogender Kumar Gupta <yogender.gu...@gmail.com> Committer: Anton Khirnov <an...@khirnov.net> Date: Mon Sep 19 20:19:10 2016 +0530 hwcontext_cuda: Add P010 and YUV444P16 pixel format Signed-off-by: Anton Khirnov <an...@khirnov.net> --- libavutil/hwcontext_cuda.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index b8781ce..2607834 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -32,7 +32,9 @@ typedef struct CUDAFramesContext { static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, +AV_PIX_FMT_P010, AV_PIX_FMT_YUV444P, +AV_PIX_FMT_YUV444P16, }; static void cuda_buffer_free(void *opaque, uint8_t *data) @@ -105,9 +107,15 @@ static int cuda_frames_init(AVHWFramesContext *ctx) case AV_PIX_FMT_YUV420P: size = ctx->width * ctx->height * 3 / 2; break; +case AV_PIX_FMT_P010: +size = ctx->width * ctx->height * 3; +break; case AV_PIX_FMT_YUV444P: size = ctx->width * ctx->height * 3; break; +case AV_PIX_FMT_YUV444P16: +size = ctx->width * ctx->height * 6; +break; } ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); @@ -139,6 +147,12 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) frame->linesize[1] = ctx->width / 2; frame->linesize[2] = ctx->width / 2; break; +case AV_PIX_FMT_P010: +frame->data[0] = frame->buf[0]->data; +frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height; +frame->linesize[0] = 2 * ctx->width; +frame->linesize[1] = 2 * ctx->width; +break; case AV_PIX_FMT_YUV444P: frame->data[0] = frame->buf[0]->data; frame->data[1] = frame->data[0] + ctx->width * ctx->height; @@ -147,6 +161,14 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) frame->linesize[1] = ctx->width; frame->linesize[2] = ctx->width; break; +case AV_PIX_FMT_YUV444P16: +frame->data[0] = frame->buf[0]->data; +frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height; +frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height; +frame->linesize[0] = 2 * ctx->width; +frame->linesize[1] = 2 * ctx->width; +frame->linesize[2] = 2 * ctx->width; +break; default: av_frame_unref(frame); return AVERROR_BUG; ___ libav-commits mailing list libav-commits@libav.org https://lists.libav.org/mailman/listinfo/libav-commits
[libav-commits] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction
Module: libav Branch: release/12 Commit: 55ff67c45a6c778b7002f224d2018d98870b2510 Author:Yogender Kumar Gupta <yogender.gu...@gmail.com> Committer: Sean McGovern <gsean...@gmail.com> Date: Thu Jun 15 18:56:13 2017 -0400 h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction CC: libav-sta...@libav.org Signed-off-by: Anton Khirnov <an...@khirnov.net> (cherry picked from commit 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e) Signed-off-by: Sean McGovern <gsean...@gmail.com> --- libavcodec/h264_mb.c | 7 +++- libavcodec/h264pred.c | 2 ++ libavcodec/h264pred.h | 3 ++ libavcodec/h264pred_template.c | 73 ++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index f037bd5..51d73ce 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -636,7 +636,12 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, uint8_t *const ptr = dest_y + block_offset[i]; const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { -h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +if (h->x264_build < 151U) { +h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +} else +h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), +(sl-> topleft_samples_available << i) & 0x8000, + (sl->topright_samples_available << i) & 0x4000, linesize); } else { const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 7627eb0..135babc 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ +h->pred8x8l_filter_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_filter_add , depth);\ +h->pred8x8l_filter_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_filter_add , depth);\ if (chroma_format_idc <= 1) {\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 60e7434..795d8f3 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,6 +101,9 @@ typedef struct H264PredContext { int16_t *block /*align 16*/, ptrdiff_t stride); void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, int16_t *block /*align 16*/, ptrdiff_t stride); +void(*pred8x8l_filter_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, + int topleft, int topright, ptrdiff_t stride); void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, const int *block_offset, int16_t *block /*align 16*/, ptrdiff_t stride); diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 8492b2b..02494aa 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; } + +static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, +int has_topright, ptrdiff_t _stride) +{ +int i; +pixel *src = (pixel*)_src; +const dctcoef *block = (const dctcoef*)_block; +pixel pix[8]; +int stride = _stride/sizeof(pixel); +PREDICT_8x8_LOAD_TOP; + +pix[0] = t0; +pix[1] = t1; +pix[2] = t2; +pix[3] = t3; +pix[4] = t4; +pix[5] = t5; +pix[6] = t6; +pix[7] = t7; + +for (i = 0; i < 8; i++) { +pixel v = pix[i]; +src[0 * stride] = v += block[0]; +src[1 * stride] = v += block[8]; +src[2 * str
[libav-commits] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction
Module: libav Branch: master Commit: 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e Author:Yogender Kumar Gupta <yogender.gu...@gmail.com> Committer: Anton Khirnov <an...@khirnov.net> Date: Thu Jun 15 18:56:13 2017 -0400 h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction CC: libav-sta...@libav.org Signed-off-by: Anton Khirnov <an...@khirnov.net> --- libavcodec/h264_mb.c | 7 +++- libavcodec/h264pred.c | 2 ++ libavcodec/h264pred.h | 3 ++ libavcodec/h264pred_template.c | 73 ++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index f037bd5..51d73ce 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -636,7 +636,12 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, uint8_t *const ptr = dest_y + block_offset[i]; const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { -h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +if (h->x264_build < 151U) { +h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); +} else +h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), +(sl-> topleft_samples_available << i) & 0x8000, + (sl->topright_samples_available << i) & 0x4000, linesize); } else { const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 7627eb0..135babc 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ +h->pred8x8l_filter_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_filter_add , depth);\ +h->pred8x8l_filter_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_filter_add , depth);\ if (chroma_format_idc <= 1) {\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add, depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 60e7434..795d8f3 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,6 +101,9 @@ typedef struct H264PredContext { int16_t *block /*align 16*/, ptrdiff_t stride); void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, int16_t *block /*align 16*/, ptrdiff_t stride); +void(*pred8x8l_filter_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, + int topleft, int topright, ptrdiff_t stride); void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, const int *block_offset, int16_t *block /*align 16*/, ptrdiff_t stride); diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 8492b2b..02494aa 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; } + +static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, +int has_topright, ptrdiff_t _stride) +{ +int i; +pixel *src = (pixel*)_src; +const dctcoef *block = (const dctcoef*)_block; +pixel pix[8]; +int stride = _stride/sizeof(pixel); +PREDICT_8x8_LOAD_TOP; + +pix[0] = t0; +pix[1] = t1; +pix[2] = t2; +pix[3] = t3; +pix[4] = t4; +pix[5] = t5; +pix[6] = t6; +pix[7] = t7; + +for (i = 0; i < 8; i++) { +pixel v = pix[i]; +src[0 * stride] = v += block[0]; +src[1 * stride] = v += block[8]; +src[2 * stride] = v += block[16]; +src[3 * stride] = v += block[24]; +src[4 * stride] = v += block[32]; +src[5 * stride] = v +