On Wed, Nov 16, 2016 at 6:06 PM, Zhao Yakui <yakui.z...@intel.com> wrote: > This will help to make good use of HW EU resources. > If it is not supported, it will fall back to the original config. > > > Signed-off-by: Zhao Yakui <yakui.z...@intel.com> > --- > src/gen8_mfc.c | 6 +++++- > src/gen8_post_processing.c | 5 ++++- > src/gen8_vme.c | 7 ++++++- > src/gen9_post_processing.c | 12 ++++++++---- > src/gen9_vme.c | 8 +++++++- > src/gen9_vp9_encoder.c | 23 +++++++++++++++-------- > 6 files changed, 45 insertions(+), 16 deletions(-) > > diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c > index 63ffea5..634a500 100644 > --- a/src/gen8_mfc.c > +++ b/src/gen8_mfc.c > @@ -4612,7 +4612,11 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, > struct intel_encoder_context *e > mfc_context->gpe_context.curbe_size = 32 * 4; > mfc_context->gpe_context.sampler_size = 0; > > - mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > + if (i965->intel.has_eu_flag) > + mfc_context->gpe_context.vfe_state.max_num_threads = 6 * > i965->intel.eu_total; > + else > + mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > + > mfc_context->gpe_context.vfe_state.num_urb_entries = 16; > mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; > mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; > diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c > index 708918b..cabf06e 100644 > --- a/src/gen8_post_processing.c > +++ b/src/gen8_post_processing.c > @@ -1592,7 +1592,10 @@ > gen8_post_processing_context_common_init(VADriverContextP ctx, > struct pp_module *pp_module; > struct i965_post_processing_context *pp_context = data; > > - pp_context->vfe_gpu_state.max_num_threads = 60; > + if (i965->intel.has_eu_flag) > + pp_context->vfe_gpu_state.max_num_threads = 6 * i965->intel.eu_total; > + else > + pp_context->vfe_gpu_state.max_num_threads = 60; > pp_context->vfe_gpu_state.num_urb_entries = 59; > pp_context->vfe_gpu_state.gpgpu_mode = 0; > pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; > diff --git a/src/gen8_vme.c b/src/gen8_vme.c > index c79c62b..fd16ac8 100644 > --- a/src/gen8_vme.c > +++ b/src/gen8_vme.c > @@ -1333,6 +1333,7 @@ gen8_vme_context_destroy(void *context) > > Bool gen8_vme_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context) > { > + struct i965_driver_data *i965 = i965_driver_data(ctx); > struct gen6_vme_context *vme_context = NULL; > struct i965_kernel *vme_kernel_list = NULL; > int i965_kernel_num; > @@ -1382,8 +1383,12 @@ Bool gen8_vme_context_init(VADriverContextP ctx, > struct intel_encoder_context *e > vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH; > vme_context->gpe_context.sampler_size = 0; > > + if (i965->intel.has_eu_flag) { > + vme_context->gpe_context.vfe_state.max_num_threads = 6 * > + i965->intel.eu_total; > + } else > + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > > - vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > vme_context->gpe_context.vfe_state.num_urb_entries = 64; > vme_context->gpe_context.vfe_state.gpgpu_mode = 0; > vme_context->gpe_context.vfe_state.urb_entry_size = 16; > diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c > index a5d345c..3ea0908 100644 > --- a/src/gen9_post_processing.c > +++ b/src/gen9_post_processing.c > @@ -546,10 +546,14 @@ gen9_post_processing_context_init(VADriverContextP ctx, > gpe_context->surface_state_binding_table.surface_state_offset = > ALIGN(MAX_SCALING_SURFACES * 4, 64); > gpe_context->surface_state_binding_table.length = > ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * > SURFACE_STATE_PADDED_SIZE_GEN9, 64); > > - if (i965->intel.has_bsd2) > - gpe_context->vfe_state.max_num_threads = 300; > - else > - gpe_context->vfe_state.max_num_threads = 60; > + if (i965->intel.has_eu_flag) { > + gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6; > + } else { > + if (i965->intel.has_bsd2) > + gpe_context->vfe_state.max_num_threads = 300; > + else > + gpe_context->vfe_state.max_num_threads = 60; > + } > > gpe_context->vfe_state.curbe_allocation_size = 37; > gpe_context->vfe_state.urb_entry_size = 16; > diff --git a/src/gen9_vme.c b/src/gen9_vme.c > index 6ad8fff..bbaec3c 100644 > --- a/src/gen9_vme.c > +++ b/src/gen9_vme.c > @@ -1978,6 +1978,7 @@ gen9_vme_context_destroy(void *context) > > Bool gen9_vme_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context) > { > + struct i965_driver_data *i965 = i965_driver_data(ctx); > struct gen6_vme_context *vme_context; > struct i965_kernel *vme_kernel_list = NULL; > int i965_kernel_num; > @@ -2036,7 +2037,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, > struct intel_encoder_context *e > vme_context->gpe_context.sampler_size = 0; > > > - vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > + if (i965->intel.has_eu_flag) { > + vme_context->gpe_context.vfe_state.max_num_threads = 6 * > + i965->intel.eu_total; > + } else > + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; > + > vme_context->gpe_context.vfe_state.num_urb_entries = 64; > vme_context->gpe_context.vfe_state.gpgpu_mode = 0; > vme_context->gpe_context.vfe_state.urb_entry_size = 16; > diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c > index f39d6d0..0a54a36 100644 > --- a/src/gen9_vp9_encoder.c > +++ b/src/gen9_vp9_encoder.c > @@ -3679,9 +3679,12 @@ gen9_vp9_mbenc_kernel(VADriverContextP ctx, > } > > static void > -gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context, > +gen9_init_gpe_context_vp9(VADriverContextP ctx, > + struct i965_gpe_context *gpe_context, > struct vp9_encoder_kernel_parameter *kernel_param) > { > + struct i965_driver_data *i965 = i965_driver_data(ctx); > + > gpe_context->curbe.length = kernel_param->curbe_size; // in bytes > > gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64); > @@ -3701,7 +3704,11 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context > *gpe_context, > gpe_context->surface_state_binding_table.surface_state_offset = > ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64); > gpe_context->surface_state_binding_table.length = > ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * > SURFACE_STATE_PADDED_SIZE_GEN9, 64); > > - gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads > + if (i965->intel.has_eu_flag) > + gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total; > + else > + gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads > + > gpe_context->vfe_state.curbe_allocation_size = MAX(1, > ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers > gpe_context->vfe_state.urb_entry_size = MAX(1, > ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers > gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE - > @@ -4607,7 +4614,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx, > scoreboard_param.walkpat_flag = 0; > > gpe_context = &scaling_context->gpe_contexts[0]; > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > scaling_context->scaling_4x_bti.scaling_frame_src_y = > VP9_BTI_SCALING_FRAME_SRC_Y; > @@ -4633,7 +4640,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx, > kernel_param.sampler_size = 0; > > gpe_context = &scaling_context->gpe_contexts[1]; > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > memset(&scale_kernel, 0, sizeof(scale_kernel)); > @@ -4675,7 +4682,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx, > scoreboard_param.walkpat_flag = 0; > > gpe_context = &me_context->gpe_context; > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > memset(&scale_kernel, 0, sizeof(scale_kernel)); > @@ -4723,7 +4730,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx, > } else > scoreboard_param.walkpat_flag = 0; > > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > memset(&scale_kernel, 0, sizeof(scale_kernel)); > @@ -4763,7 +4770,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx, > > for (i = 0; i < NUM_VP9_BRC; i++) { > gpe_context = &brc_context->gpe_contexts[i]; > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > memset(&scale_kernel, 0, sizeof(scale_kernel)); > @@ -4802,7 +4809,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx, > scoreboard_param.walkpat_flag = 0; > > gpe_context = &dys_context->gpe_context; > - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); > + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); > gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); > > memset(&scale_kernel, 0, sizeof(scale_kernel));
This patch lgtm and also I have verified it on vp9 encoder supporting h/w Thanks, -- Daniel > -- > 2.8.3 > > _______________________________________________ > Libva mailing list > Libva@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/libva _______________________________________________ Libva mailing list Libva@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libva