Re: [FFmpeg-devel] [PATCH] lavc/vaapi-vp9: add support for profile 2 (bpp > 8)

2016-11-29 Thread Mathieu Velten
Thanks for your returns.

I'll split that into 2 patches and only enable it for vaapi indeed.

Mathieu

Le mar. 29 nov. 2016 à 04:11, Ronald S. Bultje  a
écrit :

> Hi,
>
> On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson  wrote:
>
> > On 28/11/16 21:22, Mathieu Velten wrote:
> > > ---
> > >  libavcodec/vaapi_vp9.c |  1 +
> > >  libavcodec/vp9.c   | 32 +---
> > >  libavcodec/vp9.h   |  1 +
> > >  3 files changed, 19 insertions(+), 15 deletions(-)
> >
> > Nice :)
> >
> > Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).
> >
> > This should probably be split into two patches, though - one for the
> > generic vp9 hwaccel support, a second then enabling it for VAAPI.
> >
> > > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> > > index b360dcb..9b3e81a 100644
> > > --- a/libavcodec/vaapi_vp9.c
> > > +++ b/libavcodec/vaapi_vp9.c
> > > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext
> >  *avctx,
> > >  pp->first_partition_size = h->h.compressed_header_size;
> > >
> > >  pp->profile = h->h.profile;
> > > +pp->bit_depth = h->h.bpp;
> > >
> > >  pp->filter_level = h->h.filter.level;
> > >  pp->sharpness_level = h->h.filter.sharpness;
> > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> > > index 0ec895a..ff526da 100644
> > > --- a/libavcodec/vp9.c
> > > +++ b/libavcodec/vp9.c
> > > @@ -68,7 +68,7 @@ typedef struct VP9Context {
> > >  ptrdiff_t y_stride, uv_stride;
> > >
> > >  uint8_t ss_h, ss_v;
> > > -uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> > > +uint8_t last_bpp, bpp_index, bytesperpixel;
> > >  uint8_t last_keyframe;
> > >  // sb_cols/rows, rows/cols and last_fmt are used for allocating
> all
> > internal
> > >  // arrays, and are thus per-thread. w/h and gf_fmt are synced
> > between threads
> > > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w,
> > int h)
> > >  if ((res = ff_set_dimensions(ctx, w, h)) < 0)
> > >  return res;
> > >
> > > -if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> > > +if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> > > +s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> > > +s->pix_fmt == AV_PIX_FMT_YUV420P12) {
> > >  #if CONFIG_VP9_DXVA2_HWACCEL
> > >  *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
> > >  #endif
> >
> > This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you
> > probably didn't want to do that - I think it would be better with
> something
> > more like  > libavcodec/hevc.c;hb=HEAD#l350>.
>
>
> I'll let you guys figure out the details for this, but generic vp9.[ch]
> changes are OK with me.
>
> Thanks!
> Ronald
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc/vaapi-vp9: add support for profile 2 (bpp > 8)

2016-11-28 Thread Ronald S. Bultje
Hi,

On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson  wrote:

> On 28/11/16 21:22, Mathieu Velten wrote:
> > ---
> >  libavcodec/vaapi_vp9.c |  1 +
> >  libavcodec/vp9.c   | 32 +---
> >  libavcodec/vp9.h   |  1 +
> >  3 files changed, 19 insertions(+), 15 deletions(-)
>
> Nice :)
>
> Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).
>
> This should probably be split into two patches, though - one for the
> generic vp9 hwaccel support, a second then enabling it for VAAPI.
>
> > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> > index b360dcb..9b3e81a 100644
> > --- a/libavcodec/vaapi_vp9.c
> > +++ b/libavcodec/vaapi_vp9.c
> > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext
>  *avctx,
> >  pp->first_partition_size = h->h.compressed_header_size;
> >
> >  pp->profile = h->h.profile;
> > +pp->bit_depth = h->h.bpp;
> >
> >  pp->filter_level = h->h.filter.level;
> >  pp->sharpness_level = h->h.filter.sharpness;
> > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> > index 0ec895a..ff526da 100644
> > --- a/libavcodec/vp9.c
> > +++ b/libavcodec/vp9.c
> > @@ -68,7 +68,7 @@ typedef struct VP9Context {
> >  ptrdiff_t y_stride, uv_stride;
> >
> >  uint8_t ss_h, ss_v;
> > -uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> > +uint8_t last_bpp, bpp_index, bytesperpixel;
> >  uint8_t last_keyframe;
> >  // sb_cols/rows, rows/cols and last_fmt are used for allocating all
> internal
> >  // arrays, and are thus per-thread. w/h and gf_fmt are synced
> between threads
> > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w,
> int h)
> >  if ((res = ff_set_dimensions(ctx, w, h)) < 0)
> >  return res;
> >
> > -if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> > +if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> > +s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> > +s->pix_fmt == AV_PIX_FMT_YUV420P12) {
> >  #if CONFIG_VP9_DXVA2_HWACCEL
> >  *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
> >  #endif
>
> This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you
> probably didn't want to do that - I think it would be better with something
> more like  libavcodec/hevc.c;hb=HEAD#l350>.


I'll let you guys figure out the details for this, but generic vp9.[ch]
changes are OK with me.

Thanks!
Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc/vaapi-vp9: add support for profile 2 (bpp > 8)

2016-11-28 Thread Mark Thompson
On 28/11/16 21:22, Mathieu Velten wrote:
> ---
>  libavcodec/vaapi_vp9.c |  1 +
>  libavcodec/vp9.c   | 32 +---
>  libavcodec/vp9.h   |  1 +
>  3 files changed, 19 insertions(+), 15 deletions(-)

Nice :)

Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).

This should probably be split into two patches, though - one for the generic 
vp9 hwaccel support, a second then enabling it for VAAPI.

> diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> index b360dcb..9b3e81a 100644
> --- a/libavcodec/vaapi_vp9.c
> +++ b/libavcodec/vaapi_vp9.c
> @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext  
>*avctx,
>  pp->first_partition_size = h->h.compressed_header_size;
>  
>  pp->profile = h->h.profile;
> +pp->bit_depth = h->h.bpp;
>  
>  pp->filter_level = h->h.filter.level;
>  pp->sharpness_level = h->h.filter.sharpness;
> diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> index 0ec895a..ff526da 100644
> --- a/libavcodec/vp9.c
> +++ b/libavcodec/vp9.c
> @@ -68,7 +68,7 @@ typedef struct VP9Context {
>  ptrdiff_t y_stride, uv_stride;
>  
>  uint8_t ss_h, ss_v;
> -uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> +uint8_t last_bpp, bpp_index, bytesperpixel;
>  uint8_t last_keyframe;
>  // sb_cols/rows, rows/cols and last_fmt are used for allocating all 
> internal
>  // arrays, and are thus per-thread. w/h and gf_fmt are synced between 
> threads
> @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, int h)
>  if ((res = ff_set_dimensions(ctx, w, h)) < 0)
>  return res;
>  
> -if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> +if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> +s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> +s->pix_fmt == AV_PIX_FMT_YUV420P12) {
>  #if CONFIG_VP9_DXVA2_HWACCEL
>  *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
>  #endif

This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you probably 
didn't want to do that - I think it would be better with something more like 
.

> @@ -326,10 +328,10 @@ static int update_size(AVCodecContext *ctx, int w, int 
> h)
>  av_freep(>b_base);
>  av_freep(>block_base);
>  
> -if (s->bpp != s->last_bpp) {
> -ff_vp9dsp_init(>dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
> -ff_videodsp_init(>vdsp, s->bpp);
> -s->last_bpp = s->bpp;
> +if (s->s.h.bpp != s->last_bpp) {
> +ff_vp9dsp_init(>dsp, s->s.h.bpp, ctx->flags & 
> AV_CODEC_FLAG_BITEXACT);
> +ff_videodsp_init(>vdsp, s->s.h.bpp);
> +s->last_bpp = s->s.h.bpp;
>  }
>  
>  return 0;
> @@ -458,8 +460,8 @@ static int read_colorspace_details(AVCodecContext *ctx)
>  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(>gb); // 0:8, 1:10, 
> 2:12
>  
>  s->bpp_index = bits;
> -s->bpp = 8 + bits * 2;
> -s->bytesperpixel = (7 + s->bpp) >> 3;
> +s->s.h.bpp = 8 + bits * 2;
> +s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
>  ctx->colorspace = colorspaces[get_bits(>gb, 3)];
>  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
>  static const enum AVPixelFormat pix_fmt_rgb[3] = {
> @@ -571,7 +573,7 @@ static int decode_frame_header(AVCodecContext *ctx,
>  return res;
>  } else {
>  s->ss_h = s->ss_v = 1;
> -s->bpp = 8;
> +s->s.h.bpp = 8;
>  s->bpp_index = 0;
>  s->bytesperpixel = 1;
>  s->pix_fmt = AV_PIX_FMT_YUV420P;
> @@ -2278,7 +2280,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t 
> *coef, int n_coeffs,
>   const int16_t (*nb)[2], const int16_t 
> *band_counts,
>   const int16_t *qmul)
>  {
> -return decode_coeffs_b_generic(>c, coef, n_coeffs, 0, 0, s->bpp, cnt, 
> eob, p,
> +return decode_coeffs_b_generic(>c, coef, n_coeffs, 0, 0, s->s.h.bpp, 
> cnt, eob, p,
> nnz, scan, nb, band_counts, qmul);
>  }
>  
> @@ -2288,7 +2290,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, 
> int16_t *coef, int n_coeffs,
> const int16_t (*nb)[2], const int16_t 
> *band_counts,
> const int16_t *qmul)
>  {
> -return decode_coeffs_b_generic(>c, coef, n_coeffs, 1, 0, s->bpp, cnt, 
> eob, p,
> +return decode_coeffs_b_generic(>c, coef, n_coeffs, 1, 0, s->s.h.bpp, 
> cnt, eob, p,
> nnz, scan, nb, band_counts, qmul);
>  }
>  
> @@ -2479,7 +2481,7 @@ static av_always_inline int check_intra_mode(VP9Context 
> *s, int mode, uint8_t **
>  int have_top = row > 0 || y > 0;
>  int have_left = col > s->tile_col_start || x > 0;
>  int have_right = x < w - 1;
> -int bpp = s->bpp;
> +  

[FFmpeg-devel] [PATCH] lavc/vaapi-vp9: add support for profile 2 (bpp > 8)

2016-11-28 Thread Mathieu Velten
---
 libavcodec/vaapi_vp9.c |  1 +
 libavcodec/vp9.c   | 32 +---
 libavcodec/vp9.h   |  1 +
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
index b360dcb..9b3e81a 100644
--- a/libavcodec/vaapi_vp9.c
+++ b/libavcodec/vaapi_vp9.c
@@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext
 *avctx,
 pp->first_partition_size = h->h.compressed_header_size;
 
 pp->profile = h->h.profile;
+pp->bit_depth = h->h.bpp;
 
 pp->filter_level = h->h.filter.level;
 pp->sharpness_level = h->h.filter.sharpness;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 0ec895a..ff526da 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -68,7 +68,7 @@ typedef struct VP9Context {
 ptrdiff_t y_stride, uv_stride;
 
 uint8_t ss_h, ss_v;
-uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
+uint8_t last_bpp, bpp_index, bytesperpixel;
 uint8_t last_keyframe;
 // sb_cols/rows, rows/cols and last_fmt are used for allocating all 
internal
 // arrays, and are thus per-thread. w/h and gf_fmt are synced between 
threads
@@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, int h)
 if ((res = ff_set_dimensions(ctx, w, h)) < 0)
 return res;
 
-if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
+s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
+s->pix_fmt == AV_PIX_FMT_YUV420P12) {
 #if CONFIG_VP9_DXVA2_HWACCEL
 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 #endif
@@ -326,10 +328,10 @@ static int update_size(AVCodecContext *ctx, int w, int h)
 av_freep(>b_base);
 av_freep(>block_base);
 
-if (s->bpp != s->last_bpp) {
-ff_vp9dsp_init(>dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
-ff_videodsp_init(>vdsp, s->bpp);
-s->last_bpp = s->bpp;
+if (s->s.h.bpp != s->last_bpp) {
+ff_vp9dsp_init(>dsp, s->s.h.bpp, ctx->flags & 
AV_CODEC_FLAG_BITEXACT);
+ff_videodsp_init(>vdsp, s->s.h.bpp);
+s->last_bpp = s->s.h.bpp;
 }
 
 return 0;
@@ -458,8 +460,8 @@ static int read_colorspace_details(AVCodecContext *ctx)
 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(>gb); // 0:8, 1:10, 
2:12
 
 s->bpp_index = bits;
-s->bpp = 8 + bits * 2;
-s->bytesperpixel = (7 + s->bpp) >> 3;
+s->s.h.bpp = 8 + bits * 2;
+s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 ctx->colorspace = colorspaces[get_bits(>gb, 3)];
 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 static const enum AVPixelFormat pix_fmt_rgb[3] = {
@@ -571,7 +573,7 @@ static int decode_frame_header(AVCodecContext *ctx,
 return res;
 } else {
 s->ss_h = s->ss_v = 1;
-s->bpp = 8;
+s->s.h.bpp = 8;
 s->bpp_index = 0;
 s->bytesperpixel = 1;
 s->pix_fmt = AV_PIX_FMT_YUV420P;
@@ -2278,7 +2280,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t 
*coef, int n_coeffs,
  const int16_t (*nb)[2], const int16_t 
*band_counts,
  const int16_t *qmul)
 {
-return decode_coeffs_b_generic(>c, coef, n_coeffs, 0, 0, s->bpp, cnt, 
eob, p,
+return decode_coeffs_b_generic(>c, coef, n_coeffs, 0, 0, s->s.h.bpp, 
cnt, eob, p,
nnz, scan, nb, band_counts, qmul);
 }
 
@@ -2288,7 +2290,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t 
*coef, int n_coeffs,
const int16_t (*nb)[2], const int16_t 
*band_counts,
const int16_t *qmul)
 {
-return decode_coeffs_b_generic(>c, coef, n_coeffs, 1, 0, s->bpp, cnt, 
eob, p,
+return decode_coeffs_b_generic(>c, coef, n_coeffs, 1, 0, s->s.h.bpp, 
cnt, eob, p,
nnz, scan, nb, band_counts, qmul);
 }
 
@@ -2479,7 +2481,7 @@ static av_always_inline int check_intra_mode(VP9Context 
*s, int mode, uint8_t **
 int have_top = row > 0 || y > 0;
 int have_left = col > s->tile_col_start || x > 0;
 int have_right = x < w - 1;
-int bpp = s->bpp;
+int bpp = s->s.h.bpp;
 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
 [VERT_PRED]= { { DC_127_PRED,  VERT_PRED },
{ DC_127_PRED,  VERT_PRED } },
@@ -3310,13 +3312,13 @@ static void decode_b(AVCodecContext *ctx, int row, int 
col,
 s->uv_stride = f->linesize[1];
 }
 if (b->intra) {
-if (s->bpp > 8) {
+if (s->s.h.bpp > 8) {
 intra_recon_16bpp(ctx, yoff, uvoff);
 } else {
 intra_recon_8bpp(ctx, yoff, uvoff);
 }
 } else {
-if (s->bpp > 8) {
+if (s->s.h.bpp > 8) {
 inter_recon_16bpp(ctx);
 } else {