Re: [FFmpeg-devel] [PATCH v1 4/4] vaapi_encode_h265: Query encoding block sizes and features

2022-02-20 Thread Wang, Fei W
On Fri, 2022-02-18 at 04:38 +, Xiang, Haihao wrote:
> On Fri, 2022-02-18 at 09:43 +0800, Fei Wang wrote:
> > From: Mark Thompson 
> > 
> > Signed-off-by: Fei Wang 
> > ---
> >  libavcodec/vaapi_encode_h265.c | 107
> > +++--
> >  1 file changed, 102 insertions(+), 5 deletions(-)
> > 
> > diff --git a/libavcodec/vaapi_encode_h265.c
> > b/libavcodec/vaapi_encode_h265.c
> > index 8319848e4a..e98502503d 100644
> > --- a/libavcodec/vaapi_encode_h265.c
> > +++ b/libavcodec/vaapi_encode_h265.c
> > @@ -56,6 +56,9 @@ typedef struct VAAPIEncodeH265Context {
> >  VAAPIEncodeContext common;
> >  
> >  // Encoder features.
> > +uint32_t va_features;
> > +// Block size info.
> > +uint32_t va_bs;
> >  uint32_t ctu_size;
> >  uint32_t min_cb_size;
> >  
> > @@ -427,9 +430,9 @@ static int
> > vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
> >  vps->vps_max_latency_increase_plus1[i];
> >  }
> >  
> > -// These have to come from the capabilities of the
> > encoder.  We have no
> > -// way to query them, so just hardcode parameters which work
> > on the Intel
> > -// driver.
> > +// These values come from the capabilities of the first
> > encoder
> > +// implementation in the i965 driver on Intel Skylake.  They
> > may
> > +// fail badly with other platforms or drivers.
> >  // CTB size from 8x8 to 32x32.
> >  sps->log2_min_luma_coding_block_size_minus3   = 0;
> >  sps->log2_diff_max_min_luma_coding_block_size = 2;
> > @@ -447,6 +450,42 @@ static int
> > vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
> >  
> >  sps->pcm_enabled_flag = 0;
> >  
> > +// update sps setting according to queried result
> > +#if VA_CHECK_VERSION(1, 13, 0)
> > +if (priv->va_features) {
> > +VAConfigAttribValEncHEVCFeatures features = { .value =
> > priv-
> > > va_features };
> > +
> > +// Enable feature if get queried result is
> > VA_FEATURE_SUPPORTED |
> > VA_FEATURE_REQUIRED
> > +sps->amp_enabled_flag =
> > +!!features.bits.amp;
> > +sps->sample_adaptive_offset_enabled_flag =
> > +!!features.bits.sao;
> > +sps->sps_temporal_mvp_enabled_flag =
> > +!!features.bits.temporal_mvp;
> > +sps->pcm_enabled_flag =
> > +!!features.bits.pcm;
> > +}
> > +
> > +if (priv->va_bs) {
> > +VAConfigAttribValEncHEVCBlockSizes bs = { .value = priv-
> > >va_bs };
> > +sps->log2_min_luma_coding_block_size_minus3 =
> > +ff_ctz(priv->min_cb_size) - 3;
> > +sps->log2_diff_max_min_luma_coding_block_size =
> > +ff_ctz(priv->ctu_size) - ff_ctz(priv->min_cb_size);
> > +
> > +sps->log2_min_luma_transform_block_size_minus2 =
> > +bs.bits.log2_min_luma_transform_block_size_minus2;
> > +sps->log2_diff_max_min_luma_transform_block_size =
> > +bs.bits.log2_max_luma_transform_block_size_minus2 -
> > +bs.bits.log2_min_luma_transform_block_size_minus2;
> > +
> > +sps->max_transform_hierarchy_depth_inter =
> > +bs.bits.max_max_transform_hierarchy_depth_inter;
> > +sps->max_transform_hierarchy_depth_intra =
> > +bs.bits.max_max_transform_hierarchy_depth_intra;
> > +}
> > +#endif
> > +
> >  // STRPSs should ideally be here rather than defined
> > individually in
> >  // each slice, but the structure isn't completely fixed so for
> > now
> >  // don't bother.
> > @@ -539,6 +578,23 @@ static int
> > vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
> >  pps->cu_qp_delta_enabled_flag = (ctx->va_rc_mode !=
> > VA_RC_CQP);
> >  pps->diff_cu_qp_delta_depth   = 0;
> >  
> > +// update pps setting according to queried result
> > +#if VA_CHECK_VERSION(1, 13, 0)
> > +if (priv->va_features) {
> > +VAConfigAttribValEncHEVCFeatures features = { .value =
> > priv-
> > > va_features };
> > +if (ctx->va_rc_mode != VA_RC_CQP)
> > +pps->cu_qp_delta_enabled_flag =
> > +!!features.bits.cu_qp_delta;
> 
> Please fix the indentation 

Thanks Haihao to help review. Fixed in V2.

Thanks
Fei

> 
> > +
> > +pps->transform_skip_enabled_flag =
> > +!!features.bits.transform_skip;
> > +// set diff_cu_qp_delta_depth as its max value if
> > cu_qp_delta
> > enabled. Otherwise
> > +// 0 will make cu_qp_delta invalid.
> > +if (pps->cu_qp_delta_enabled_flag)
> > +pps->diff_cu_qp_delta_depth = sps-
> > > log2_diff_max_min_luma_coding_block_size;
> > +}
> > +#endif
> > +
> >  if (ctx->tile_rows && ctx->tile_cols) {
> >  int uniform_spacing;
> >  
> > @@ -640,8 +696,8 @@ static int
> > vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
> >  
> >  .coded_buf = VA_INVALID_ID,
> >  
> > -.collocated_ref_pic_index = 0xff,
> > -
> > +.collocated_ref_pic_index = 

Re: [FFmpeg-devel] [PATCH v1 4/4] vaapi_encode_h265: Query encoding block sizes and features

2022-02-17 Thread Xiang, Haihao
On Fri, 2022-02-18 at 09:43 +0800, Fei Wang wrote:
> From: Mark Thompson 
> 
> Signed-off-by: Fei Wang 
> ---
>  libavcodec/vaapi_encode_h265.c | 107 +++--
>  1 file changed, 102 insertions(+), 5 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
> index 8319848e4a..e98502503d 100644
> --- a/libavcodec/vaapi_encode_h265.c
> +++ b/libavcodec/vaapi_encode_h265.c
> @@ -56,6 +56,9 @@ typedef struct VAAPIEncodeH265Context {
>  VAAPIEncodeContext common;
>  
>  // Encoder features.
> +uint32_t va_features;
> +// Block size info.
> +uint32_t va_bs;
>  uint32_t ctu_size;
>  uint32_t min_cb_size;
>  
> @@ -427,9 +430,9 @@ static int
> vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
>  vps->vps_max_latency_increase_plus1[i];
>  }
>  
> -// These have to come from the capabilities of the encoder.  We have no
> -// way to query them, so just hardcode parameters which work on the Intel
> -// driver.
> +// These values come from the capabilities of the first encoder
> +// implementation in the i965 driver on Intel Skylake.  They may
> +// fail badly with other platforms or drivers.
>  // CTB size from 8x8 to 32x32.
>  sps->log2_min_luma_coding_block_size_minus3   = 0;
>  sps->log2_diff_max_min_luma_coding_block_size = 2;
> @@ -447,6 +450,42 @@ static int
> vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
>  
>  sps->pcm_enabled_flag = 0;
>  
> +// update sps setting according to queried result
> +#if VA_CHECK_VERSION(1, 13, 0)
> +if (priv->va_features) {
> +VAConfigAttribValEncHEVCFeatures features = { .value = priv-
> >va_features };
> +
> +// Enable feature if get queried result is VA_FEATURE_SUPPORTED |
> VA_FEATURE_REQUIRED
> +sps->amp_enabled_flag =
> +!!features.bits.amp;
> +sps->sample_adaptive_offset_enabled_flag =
> +!!features.bits.sao;
> +sps->sps_temporal_mvp_enabled_flag =
> +!!features.bits.temporal_mvp;
> +sps->pcm_enabled_flag =
> +!!features.bits.pcm;
> +}
> +
> +if (priv->va_bs) {
> +VAConfigAttribValEncHEVCBlockSizes bs = { .value = priv->va_bs };
> +sps->log2_min_luma_coding_block_size_minus3 =
> +ff_ctz(priv->min_cb_size) - 3;
> +sps->log2_diff_max_min_luma_coding_block_size =
> +ff_ctz(priv->ctu_size) - ff_ctz(priv->min_cb_size);
> +
> +sps->log2_min_luma_transform_block_size_minus2 =
> +bs.bits.log2_min_luma_transform_block_size_minus2;
> +sps->log2_diff_max_min_luma_transform_block_size =
> +bs.bits.log2_max_luma_transform_block_size_minus2 -
> +bs.bits.log2_min_luma_transform_block_size_minus2;
> +
> +sps->max_transform_hierarchy_depth_inter =
> +bs.bits.max_max_transform_hierarchy_depth_inter;
> +sps->max_transform_hierarchy_depth_intra =
> +bs.bits.max_max_transform_hierarchy_depth_intra;
> +}
> +#endif
> +
>  // STRPSs should ideally be here rather than defined individually in
>  // each slice, but the structure isn't completely fixed so for now
>  // don't bother.
> @@ -539,6 +578,23 @@ static int
> vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
>  pps->cu_qp_delta_enabled_flag = (ctx->va_rc_mode != VA_RC_CQP);
>  pps->diff_cu_qp_delta_depth   = 0;
>  
> +// update pps setting according to queried result
> +#if VA_CHECK_VERSION(1, 13, 0)
> +if (priv->va_features) {
> +VAConfigAttribValEncHEVCFeatures features = { .value = priv-
> >va_features };
> +if (ctx->va_rc_mode != VA_RC_CQP)
> +pps->cu_qp_delta_enabled_flag =
> +!!features.bits.cu_qp_delta;

Please fix the indentation 

> +
> +pps->transform_skip_enabled_flag =
> +!!features.bits.transform_skip;
> +// set diff_cu_qp_delta_depth as its max value if cu_qp_delta
> enabled. Otherwise
> +// 0 will make cu_qp_delta invalid.
> +if (pps->cu_qp_delta_enabled_flag)
> +pps->diff_cu_qp_delta_depth = sps-
> >log2_diff_max_min_luma_coding_block_size;
> +}
> +#endif
> +
>  if (ctx->tile_rows && ctx->tile_cols) {
>  int uniform_spacing;
>  
> @@ -640,8 +696,8 @@ static int
> vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
>  
>  .coded_buf = VA_INVALID_ID,
>  
> -.collocated_ref_pic_index = 0xff,
> -
> +.collocated_ref_pic_index = sps->sps_temporal_mvp_enabled_flag ?
> +0 : 0xff,
>  .last_picture = 0,
>  
>  .pic_init_qp= pps->init_qp_minus26 + 26,
> @@ -674,6 +730,8 @@ static int
> vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
>  .entropy_coding_sync_enabled_flag = pps-
> >entropy_coding_sync_enabled_flag,
>  

[FFmpeg-devel] [PATCH v1 4/4] vaapi_encode_h265: Query encoding block sizes and features

2022-02-17 Thread Fei Wang
From: Mark Thompson 

Signed-off-by: Fei Wang 
---
 libavcodec/vaapi_encode_h265.c | 107 +++--
 1 file changed, 102 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 8319848e4a..e98502503d 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -56,6 +56,9 @@ typedef struct VAAPIEncodeH265Context {
 VAAPIEncodeContext common;
 
 // Encoder features.
+uint32_t va_features;
+// Block size info.
+uint32_t va_bs;
 uint32_t ctu_size;
 uint32_t min_cb_size;
 
@@ -427,9 +430,9 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 vps->vps_max_latency_increase_plus1[i];
 }
 
-// These have to come from the capabilities of the encoder.  We have no
-// way to query them, so just hardcode parameters which work on the Intel
-// driver.
+// These values come from the capabilities of the first encoder
+// implementation in the i965 driver on Intel Skylake.  They may
+// fail badly with other platforms or drivers.
 // CTB size from 8x8 to 32x32.
 sps->log2_min_luma_coding_block_size_minus3   = 0;
 sps->log2_diff_max_min_luma_coding_block_size = 2;
@@ -447,6 +450,42 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 
 sps->pcm_enabled_flag = 0;
 
+// update sps setting according to queried result
+#if VA_CHECK_VERSION(1, 13, 0)
+if (priv->va_features) {
+VAConfigAttribValEncHEVCFeatures features = { .value = 
priv->va_features };
+
+// Enable feature if get queried result is VA_FEATURE_SUPPORTED | 
VA_FEATURE_REQUIRED
+sps->amp_enabled_flag =
+!!features.bits.amp;
+sps->sample_adaptive_offset_enabled_flag =
+!!features.bits.sao;
+sps->sps_temporal_mvp_enabled_flag =
+!!features.bits.temporal_mvp;
+sps->pcm_enabled_flag =
+!!features.bits.pcm;
+}
+
+if (priv->va_bs) {
+VAConfigAttribValEncHEVCBlockSizes bs = { .value = priv->va_bs };
+sps->log2_min_luma_coding_block_size_minus3 =
+ff_ctz(priv->min_cb_size) - 3;
+sps->log2_diff_max_min_luma_coding_block_size =
+ff_ctz(priv->ctu_size) - ff_ctz(priv->min_cb_size);
+
+sps->log2_min_luma_transform_block_size_minus2 =
+bs.bits.log2_min_luma_transform_block_size_minus2;
+sps->log2_diff_max_min_luma_transform_block_size =
+bs.bits.log2_max_luma_transform_block_size_minus2 -
+bs.bits.log2_min_luma_transform_block_size_minus2;
+
+sps->max_transform_hierarchy_depth_inter =
+bs.bits.max_max_transform_hierarchy_depth_inter;
+sps->max_transform_hierarchy_depth_intra =
+bs.bits.max_max_transform_hierarchy_depth_intra;
+}
+#endif
+
 // STRPSs should ideally be here rather than defined individually in
 // each slice, but the structure isn't completely fixed so for now
 // don't bother.
@@ -539,6 +578,23 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 pps->cu_qp_delta_enabled_flag = (ctx->va_rc_mode != VA_RC_CQP);
 pps->diff_cu_qp_delta_depth   = 0;
 
+// update pps setting according to queried result
+#if VA_CHECK_VERSION(1, 13, 0)
+if (priv->va_features) {
+VAConfigAttribValEncHEVCFeatures features = { .value = 
priv->va_features };
+if (ctx->va_rc_mode != VA_RC_CQP)
+pps->cu_qp_delta_enabled_flag =
+!!features.bits.cu_qp_delta;
+
+pps->transform_skip_enabled_flag =
+!!features.bits.transform_skip;
+// set diff_cu_qp_delta_depth as its max value if cu_qp_delta enabled. 
Otherwise
+// 0 will make cu_qp_delta invalid.
+if (pps->cu_qp_delta_enabled_flag)
+pps->diff_cu_qp_delta_depth = 
sps->log2_diff_max_min_luma_coding_block_size;
+}
+#endif
+
 if (ctx->tile_rows && ctx->tile_cols) {
 int uniform_spacing;
 
@@ -640,8 +696,8 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 
 .coded_buf = VA_INVALID_ID,
 
-.collocated_ref_pic_index = 0xff,
-
+.collocated_ref_pic_index = sps->sps_temporal_mvp_enabled_flag ?
+0 : 0xff,
 .last_picture = 0,
 
 .pic_init_qp= pps->init_qp_minus26 + 26,
@@ -674,6 +730,8 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 .entropy_coding_sync_enabled_flag = 
pps->entropy_coding_sync_enabled_flag,
 .loop_filter_across_tiles_enabled_flag =
 pps->loop_filter_across_tiles_enabled_flag,
+.pps_loop_filter_across_slices_enabled_flag =
+pps->pps_loop_filter_across_slices_enabled_flag,
 .scaling_list_data_present_flag = 
(sps->sps_scaling_list_data_present_flag |