icl: Add Multi-segmented gamma support

Ville Syrjälä Fri, 03 May 2019 08:51:18 -0700

On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote:
> ICL introduces a new gamma correction mode in display engine, called
> multi-segmented-gamma mode. This mode allows users to program the
> darker region of the gamma curve with sueprfine precision. An
> example use case for this is HDR curves (like PQ ST-2084).
> 
> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> ICL's multi-segment has 3 different sections:
> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> - fine segment: 257 values, ranges between 0 - 1/(128)
> - corase segment: 257 values, ranges between 0 - 1
> 
> This patch:
> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>   so that userspace can program with highest precision supported.
> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> - Adds functions to program/detect multi-segment gamma.
> 
> V2: Addressed review comments from Ville
>     - separate function for superfine and fine segments.
>     - remove enum for segments.
>     - reuse last entry of the LUT as gc_max value.
>     - replace if() ....cond with switch...case in icl_load_luts.
>     - add an entry variable, instead of 'word'
> 
> Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankho...@linux.intel.com>
> Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
> 
> Suggested-by: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Signed-off-by: Shashank Sharma <shashank.sha...@intel.com>
> Signed-off-by: Uma Shankar <uma.shan...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>  drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++-
>  2 files changed, 123 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..83698951760b 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -749,7 +749,8 @@ static const struct intel_device_info 
> intel_cannonlake_info = {
>       GEN(11), \
>       .ddb_size = 2048, \
>       .has_logical_ring_elsq = 1, \
> -     .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> +     .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }


Ugh. Thats one big LUT. But looks correct.

> +

Bogus newline.

>  
>  static const struct intel_device_info intel_icelake_11_info = {
>       GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_color.c 
> b/drivers/gpu/drm/i915/intel_color.c
> index 6c341bea514c..49831e8d02fb 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -41,6 +41,9 @@
>  #define CTM_COEFF_ABS(coeff)         ((coeff) & (CTM_COEFF_SIGN - 1))
>  
>  #define LEGACY_LUT_LENGTH            256
> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH                (256 * 128 * 8)
> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH       9
> +
>  /*
>   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>   * format). This macro takes the coefficient we want transformed and the
> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state 
> *crtc_state)
>       }
>  }
>  
> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> +{
> +     return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> +             (color->blue >> 6);
> +}
> +
> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> +{
> +     return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> +             (color->blue & 0x3f);
> +}
> +
> +static void
> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
> +                     const struct drm_color_lut *entry)

Indentation looks off. Also s/entry/color/ to match the other similarish
funcs maybe?

> +{
> +     struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +     struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +     enum pipe pipe = crtc->pipe;
> +
> +     /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
> +     I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red);
> +     I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green);
> +     I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue);
> +}
> +
> +static void
> +icl_program_gamma_superfine_segment(const struct intel_crtc_state 
> *crtc_state)
> +{
> +     struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +     struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +     const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> +     const struct drm_color_lut *lut = blob->data;
> +     enum pipe pipe = crtc->pipe;
> +     u32 i;
> +
> +     if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH)
> +             return;

These checks aren't needed. Just dead code.

> +
> +     /*
> +      * Every entry in the multi-segment LUT is corresponding to a superfine
> +      * segment step which is 1/(8 * 128 * 256).
> +      *
> +      * Superfine segment has 9 entries, corresponding to values
> +      * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
> +      */
> +     I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
> +
> +     for (i = 0; i < 9; i++) {
> +             const struct drm_color_lut *entry = &lut[i];
> +
> +             I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> +                        ilk_lut_12p4_udw(entry));

ldw should come before udw.

> +             I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> +                        ilk_lut_12p4_ldw(entry));
> +     }
> +}
> +
> +static void
> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
> +{
> +     struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +     struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +     const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> +     const struct drm_color_lut *lut = blob->data;
> +     const struct drm_color_lut *entry;

'entry' declaration can be moved into the loops.

> +     enum pipe pipe = crtc->pipe;
> +     u32 i;
> +
> +     if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH))
> +             return;

More checks that aren't needed.

> +
> +     /*
> +      * Every entry in the multi-segment LUT is corresponding to a superfine
> +      * segment step which is 1/(8*128*256).
> +      *
> +      * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
> +      * ... 256/(128*256). So in order to program fine segment of LUT we
> +      * need to pick every 8'th entry in LUT, and program 256 indexes.
> +      * Fine segment's index 0 is programmed in HW, and it starts from
> +      * index 1.

The wording here is a bit confusing. I guess the problem is what to call
things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point
seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's
implicit but IIRC I was told long ago that it's not actually used.

Not sure how to word that in the best way. Maybe something like?

/*
 * Fine segment (seg2) ...
 *
 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
 * with seg2[0] being unused by the hardware.
 */

Not sure that's any clearer.

> +      */
> +     I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
> +     for (i = 1; i < 257; i++) {
> +             entry = &lut[i * 8];
> +             I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
> +             I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
> +     }
> +
> +     /*
> +      * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
> +      * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
> +      * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
> +      * program 256 of those.
> +      */

Could make a note here stating that seg3[0] and seg3[1] are also unused
by the hardware, even though we have to program them to advance the
index. I don't see it mentioned in the spec, but this one I definitely
remember confirming from Art way back when. However I never verified
that on actual hw. We could also consider just programming those two
entries to 0 and start the actual coarse segment programming from index 2.
Or we could skip them by reprogramming the index directly.

> +     for (i = 0; i < 256; i++) {
> +             entry = &lut[i * 1024];

s/1024/8 * 128/ maybe?

> +             I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
> +             I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
> +     }
> +
> +     icl_load_gcmax(crtc_state, entry);
> +     ivb_load_lut_ext_max(crtc);
> +}
> +
>  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  {
>       const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state 
> *crtc_state)
>       if (crtc_state->base.degamma_lut)
>               glk_load_degamma_lut(crtc_state);
>  
> -     if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
> -         GAMMA_MODE_MODE_8BIT) {
> +     switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
> +     case GAMMA_MODE_MODE_8BIT:
>               i9xx_load_luts(crtc_state);
> -     } else {
> +             break;
> +
> +     case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
> +             icl_program_gamma_superfine_segment(crtc_state);
> +             icl_program_gamma_multi_segment(crtc_state);
> +             break;
> +
> +     default:
>               bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
>               ivb_load_lut_ext_max(crtc);
>       }
> @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state 
> *crtc_state)
>           crtc_state_is_legacy_gamma(crtc_state))
>               gamma_mode |= GAMMA_MODE_MODE_8BIT;
>       else
> -             gamma_mode |= GAMMA_MODE_MODE_10BIT;
> +             gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>  
>       return gamma_mode;
>  }
> -- 
> 2.17.1

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/icl: Add Multi-segmented gamma support

Reply via email to