Re: [Libva] [PATCH V3][libva-intel-driver] Clear IECP state buffer when it is enabled

2016-09-18 Thread Xiang, Haihao

Applied. 

Thanks
Haihao

> It fixes a VEBOX GPU hang up issue while doing P010->NV12 CSC
> 
> v3: Remove the file mode changes
> 
> v2: Add VPP_IECP_CSC_TRANSFORM flag for the actual transform (YUV<-
> >RGB) operation.
> It removes the conflict meaning of proc_ctx->is_iecp_enabled and
> proc_ctx->filters_mask when no actual transform is required, such as
> P010->NV12
> 
> Signed-off-by: peng.chen 
> ---
>  src/gen75_vpp_vebox.c | 38 +-
>  src/gen75_vpp_vebox.h |  1 +
>  2 files changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
> index 91715fc..d88a454 100644
> --- a/src/gen75_vpp_vebox.c
> +++ b/src/gen75_vpp_vebox.c
> @@ -794,7 +794,7 @@ void hsw_veb_iecp_pro_amp_table(VADriverContextP
> ctx, struct intel_vebox_context
>  }
>  
>  
> -void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct
> intel_vebox_context *proc_ctx)
> +void hsw_veb_iecp_csc_transform_table(VADriverContextP ctx, struct
> intel_vebox_context *proc_ctx)
>  {
>  unsigned int *p_table = (unsigned int*)(proc_ctx-
> >iecp_state_table.ptr + 220);
>  float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,
> 1.0};
> @@ -802,7 +802,7 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx,
> struct intel_vebox_context *pr
>  float u_coef[3]= {0.0, 0.0, 0.0};
>  int   is_transform_enabled = 0;
>  
> -if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
> +if(!(proc_ctx->filters_mask & VPP_IECP_CSC_TRANSFORM)){
>  memset(p_table, 0, 8 * 4);
>  return;
>  }
> @@ -929,7 +929,7 @@ void hsw_veb_state_table_setup(VADriverContextP
> ctx, struct intel_vebox_context
>  hsw_veb_iecp_ace_table(ctx, proc_ctx);
>  hsw_veb_iecp_tcc_table(ctx, proc_ctx);
>  hsw_veb_iecp_pro_amp_table(ctx, proc_ctx);
> -hsw_veb_iecp_csc_table(ctx, proc_ctx);
> +hsw_veb_iecp_csc_transform_table(ctx, proc_ctx);
>  hsw_veb_iecp_aoi_table(ctx, proc_ctx);
> 
>  dri_bo_unmap(iecp_bo);
> @@ -1196,8 +1196,21 @@
> gen75_vebox_ensure_surfaces_storage(VADriverContextP ctx,
>  /* Update VEBOX pipeline formats */
>  proc_ctx->fourcc_input = input_fourcc;
>  proc_ctx->fourcc_output = output_fourcc;
> -if (input_fourcc != output_fourcc)
> -proc_ctx->is_iecp_enabled = 1; // IECP needed for format
> conversion
> +if (input_fourcc != output_fourcc) {
> +proc_ctx->filters_mask |= VPP_IECP_CSC;
> +
> +if (input_fourcc == VA_FOURCC_RGBA &&
> +(output_fourcc == VA_FOURCC_NV12 ||
> + output_fourcc == VA_FOURCC_P010)) {
> +proc_ctx->filters_mask |= VPP_IECP_CSC_TRANSFORM;
> +} else if (output_fourcc == VA_FOURCC_RGBA &&
> +   (input_fourcc == VA_FOURCC_NV12 ||
> +input_fourcc == VA_FOURCC_P010)) {
> +proc_ctx->filters_mask |= VPP_IECP_CSC_TRANSFORM;
> +}
> +}
> +
> +proc_ctx->is_iecp_enabled = (proc_ctx->filters_mask &
> VPP_IECP_MASK) != 0;
> 
>  /* Create pipeline surfaces */
>  for (i = 0; i < ARRAY_ELEMS(proc_ctx->frame_store); i ++) {
> @@ -1602,6 +1615,10 @@ gen75_vebox_init_pipe_params(VADriverContextP
> ctx,
>  return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
>  }
>  }
> +
> +if(proc_ctx->filters_mask == 0)
> +proc_ctx->filters_mask |= VPP_IECP_CSC;
> +
>  return VA_STATUS_SUCCESS;
>  }
>  
> @@ -1618,11 +1635,6 @@
> gen75_vebox_init_filter_params(VADriverContextP ctx,
>  proc_ctx->is_first_frame = 0;
>  proc_ctx->is_second_field = 0;
>  
> -if(!proc_ctx->is_di_enabled && !proc_ctx->is_dn_enabled) {
> -// MUST enable IECP if all DI&DN are disabled
> -proc_ctx->is_iecp_enabled = 1;
> -}
> -
>  /* Check whether we are deinterlacing the second field */
>  if (proc_ctx->is_di_enabled) {
>  const VAProcFilterParameterBufferDeinterlacing * const
> deint_params =
> @@ -2090,7 +2102,7 @@ skl_veb_dndi_table(VADriverContextP ctx, struct
> intel_vebox_context *proc_ctx)
>  5 );  // sad tha
>  }
>  
> -void skl_veb_iecp_csc_table(VADriverContextP ctx, struct
> intel_vebox_context *proc_ctx)
> +void skl_veb_iecp_csc_transform_table(VADriverContextP ctx, struct
> intel_vebox_context *proc_ctx)
>  {
>  unsigned int *p_table = (unsigned int*)(proc_ctx-
> >iecp_state_table.ptr + 220);
>  float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0,
> 1.0};
> @@ -2098,7 +2110,7 @@ void skl_veb_iecp_csc_table(VADriverContextP
> ctx, struct intel_vebox_context *pr
>  float u_coef[3]= {0.0, 0.0, 0.0};
>  int   is_transform_enabled = 0;
>  
> -if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
> +if(!(proc_ctx->filters_mask & VPP_IECP_CSC_TRANSFORM)){
>  memset(p_table, 0, 12 * 4);
>  return;
>  }
> @@ -2226,7 +2238,7 @@ void skl_veb_state_table_setup(VADriverContextP
> ctx, struct intel_vebox_context
> 

[Libva] [PATCH V3][libva-intel-driver] Clear IECP state buffer when it is enabled

2016-09-18 Thread peng.chen
It fixes a VEBOX GPU hang up issue while doing P010->NV12 CSC

v3: Remove the file mode changes

v2: Add VPP_IECP_CSC_TRANSFORM flag for the actual transform (YUV<->RGB) 
operation.
It removes the conflict meaning of proc_ctx->is_iecp_enabled and
proc_ctx->filters_mask when no actual transform is required, such as P010->NV12

Signed-off-by: peng.chen 
---
 src/gen75_vpp_vebox.c | 38 +-
 src/gen75_vpp_vebox.h |  1 +
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
index 91715fc..d88a454 100644
--- a/src/gen75_vpp_vebox.c
+++ b/src/gen75_vpp_vebox.c
@@ -794,7 +794,7 @@ void hsw_veb_iecp_pro_amp_table(VADriverContextP ctx, 
struct intel_vebox_context
 }
 
 
-void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context 
*proc_ctx)
+void hsw_veb_iecp_csc_transform_table(VADriverContextP ctx, struct 
intel_vebox_context *proc_ctx)
 {
 unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 
220);
 float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
@@ -802,7 +802,7 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct 
intel_vebox_context *pr
 float u_coef[3]= {0.0, 0.0, 0.0};
 int   is_transform_enabled = 0;
 
-if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
+if(!(proc_ctx->filters_mask & VPP_IECP_CSC_TRANSFORM)){
 memset(p_table, 0, 8 * 4);
 return;
 }
@@ -929,7 +929,7 @@ void hsw_veb_state_table_setup(VADriverContextP ctx, struct 
intel_vebox_context
 hsw_veb_iecp_ace_table(ctx, proc_ctx);
 hsw_veb_iecp_tcc_table(ctx, proc_ctx);
 hsw_veb_iecp_pro_amp_table(ctx, proc_ctx);
-hsw_veb_iecp_csc_table(ctx, proc_ctx);
+hsw_veb_iecp_csc_transform_table(ctx, proc_ctx);
 hsw_veb_iecp_aoi_table(ctx, proc_ctx);

 dri_bo_unmap(iecp_bo);
@@ -1196,8 +1196,21 @@ gen75_vebox_ensure_surfaces_storage(VADriverContextP ctx,
 /* Update VEBOX pipeline formats */
 proc_ctx->fourcc_input = input_fourcc;
 proc_ctx->fourcc_output = output_fourcc;
-if (input_fourcc != output_fourcc)
-proc_ctx->is_iecp_enabled = 1; // IECP needed for format conversion
+if (input_fourcc != output_fourcc) {
+proc_ctx->filters_mask |= VPP_IECP_CSC;
+
+if (input_fourcc == VA_FOURCC_RGBA &&
+(output_fourcc == VA_FOURCC_NV12 ||
+ output_fourcc == VA_FOURCC_P010)) {
+proc_ctx->filters_mask |= VPP_IECP_CSC_TRANSFORM;
+} else if (output_fourcc == VA_FOURCC_RGBA &&
+   (input_fourcc == VA_FOURCC_NV12 ||
+input_fourcc == VA_FOURCC_P010)) {
+proc_ctx->filters_mask |= VPP_IECP_CSC_TRANSFORM;
+}
+}
+
+proc_ctx->is_iecp_enabled = (proc_ctx->filters_mask & VPP_IECP_MASK) != 0;

 /* Create pipeline surfaces */
 for (i = 0; i < ARRAY_ELEMS(proc_ctx->frame_store); i ++) {
@@ -1602,6 +1615,10 @@ gen75_vebox_init_pipe_params(VADriverContextP ctx,
 return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
 }
 }
+
+if(proc_ctx->filters_mask == 0)
+proc_ctx->filters_mask |= VPP_IECP_CSC;
+
 return VA_STATUS_SUCCESS;
 }
 
@@ -1618,11 +1635,6 @@ gen75_vebox_init_filter_params(VADriverContextP ctx,
 proc_ctx->is_first_frame = 0;
 proc_ctx->is_second_field = 0;
 
-if(!proc_ctx->is_di_enabled && !proc_ctx->is_dn_enabled) {
-// MUST enable IECP if all DI&DN are disabled
-proc_ctx->is_iecp_enabled = 1;
-}
-
 /* Check whether we are deinterlacing the second field */
 if (proc_ctx->is_di_enabled) {
 const VAProcFilterParameterBufferDeinterlacing * const deint_params =
@@ -2090,7 +2102,7 @@ skl_veb_dndi_table(VADriverContextP ctx, struct 
intel_vebox_context *proc_ctx)
 5 );  // sad tha
 }
 
-void skl_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context 
*proc_ctx)
+void skl_veb_iecp_csc_transform_table(VADriverContextP ctx, struct 
intel_vebox_context *proc_ctx)
 {
 unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 
220);
 float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
@@ -2098,7 +2110,7 @@ void skl_veb_iecp_csc_table(VADriverContextP ctx, struct 
intel_vebox_context *pr
 float u_coef[3]= {0.0, 0.0, 0.0};
 int   is_transform_enabled = 0;
 
-if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
+if(!(proc_ctx->filters_mask & VPP_IECP_CSC_TRANSFORM)){
 memset(p_table, 0, 12 * 4);
 return;
 }
@@ -2226,7 +2238,7 @@ void skl_veb_state_table_setup(VADriverContextP ctx, 
struct intel_vebox_context
 hsw_veb_iecp_ace_table(ctx, proc_ctx);
 hsw_veb_iecp_tcc_table(ctx, proc_ctx);
 hsw_veb_iecp_pro_amp_table(ctx, proc_ctx);
-skl_veb_iecp_csc_table(ctx, proc_ctx);
+skl_veb_iecp_csc_transform_table(ctx, proc_ctx);
 skl_veb_iecp_aoi_table(ctx