Re: [Intel-gfx] [PATCH v3 4/4] drm/i915/icl: Add Multi-segmented gamma support

2019-05-08 Thread Sharma, Shashank
We (Me and Uma) confirmed the ICL register programming sequence, by 
dumping the registers.


The correct sequence should be:

ilk_lut_12p4_udw

ilk_lut_12p4_ldw

We passed maximum value LUT (1.0) and saw only blue output, if 
programmed in opposite sequence.


Programming in above mentioned sequence gives a proper white output.

Regards
Shashank
On 5/8/2019 6:35 PM, Sharma, Shashank wrote:

On 5/7/2019 7:57 PM, Ville Syrjälä wrote:

On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:

ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 
1.0,

ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 
256),

   so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma 
mode.

- Adds functions to program/detect multi-segment gamma.

V2: Addressed review comments from Ville
 - separate function for superfine and fine segments.
 - remove enum for segments.
 - reuse last entry of the LUT as gc_max value.
 - replace if() cond with switch...case in icl_load_luts.
 - add an entry variable, instead of 'word'

V3: Addressed review comments from Ville
 - extra newline
 - s/entry/color/
 - remove LUT size checks
 - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
 - Change the comments in description of fine and coarse segments,
   and try to make more sense.
 - use 8 * 128 instead of 1024
 - add 1 entry in LUT for GCMAX

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Daniel Vetter 

Suggested-by: Ville Syrjälä 
Signed-off-by: Shashank Sharma 
Signed-off-by: Uma Shankar 
---
  drivers/gpu/drm/i915/i915_pci.c    |   2 +-
  drivers/gpu/drm/i915/intel_color.c | 127 
-

  2 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c 
b/drivers/gpu/drm/i915/i915_pci.c

index ffa2ee70a03d..2f99b585d44b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,7 @@ static const struct intel_device_info 
intel_cannonlake_info = {

  GEN(11), \
  .ddb_size = 2048, \
  .has_logical_ring_elsq = 1, \
-    .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+    .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
    static const struct intel_device_info intel_icelake_11_info = {
  GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c 
b/drivers/gpu/drm/i915/intel_color.c

index 6c341bea514c..c1a9506fd069 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,8 @@
  #define CTM_COEFF_ABS(coeff)    ((coeff) & (CTM_COEFF_SIGN - 1))
    #define LEGACY_LUT_LENGTH    256
+#define ICL_GAMMA_MULTISEG_LUT_LENGTH    (256 * 128 * 8)

Unused.

Got it

+
  /*
   * Extract the CSC coefficient from a CTM coefficient (in U32.32 
fixed point
   * format). This macro takes the coefficient we want transformed 
and the
@@ -767,6 +769,116 @@ static void glk_load_luts(const struct 
intel_crtc_state *crtc_state)

  }
  }
  +/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+    return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+    (color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+    return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+    (color->blue & 0x3f);

Blue is missing the shift.

Ok,

I'm not 100% sure if the ldw vs. udw are the right way around. The spec
has at times been inconsistent with the odd vs. even descriptions,
sometimes even contradicting itself. Also it never really defines
whether it starts counting dwords from from 0 or 1, so not sure what
odd and even actually mean. Can I presume someone has checked this
on actual hardware?
Well, the property was getting set properly, and the display looked 
ok, but dint dump the values in registers. Can check it now.

+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+   const struct drm_color_lut *color)
+{
+    struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+    struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+    enum pipe pipe = crtc->pipe;
+
+    /* Fixme: LUT entries are 16 bit only, so we can prog 0x 
max */

+    I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
+    

Re: [Intel-gfx] [PATCH v3 4/4] drm/i915/icl: Add Multi-segmented gamma support

2019-05-08 Thread Sharma, Shashank

On 5/7/2019 7:57 PM, Ville Syrjälä wrote:

On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:

ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 1.0,
ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
   so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
- Adds functions to program/detect multi-segment gamma.

V2: Addressed review comments from Ville
 - separate function for superfine and fine segments.
 - remove enum for segments.
 - reuse last entry of the LUT as gc_max value.
 - replace if() cond with switch...case in icl_load_luts.
 - add an entry variable, instead of 'word'

V3: Addressed review comments from Ville
 - extra newline
 - s/entry/color/
 - remove LUT size checks
 - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
 - Change the comments in description of fine and coarse segments,
   and try to make more sense.
 - use 8 * 128 instead of 1024
 - add 1 entry in LUT for GCMAX

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Daniel Vetter 

Suggested-by: Ville Syrjälä 
Signed-off-by: Shashank Sharma 
Signed-off-by: Uma Shankar 
---
  drivers/gpu/drm/i915/i915_pci.c|   2 +-
  drivers/gpu/drm/i915/intel_color.c | 127 -
  2 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..2f99b585d44b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,7 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_logical_ring_elsq = 1, \
-   .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+   .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
  
  static const struct intel_device_info intel_icelake_11_info = {

GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c 
b/drivers/gpu/drm/i915/intel_color.c
index 6c341bea514c..c1a9506fd069 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,8 @@
  #define CTM_COEFF_ABS(coeff)  ((coeff) & (CTM_COEFF_SIGN - 1))
  
  #define LEGACY_LUT_LENGTH		256

+#define ICL_GAMMA_MULTISEG_LUT_LENGTH  (256 * 128 * 8)

Unused.

Got it

+
  /*
   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
   * format). This macro takes the coefficient we want transformed and the
@@ -767,6 +769,116 @@ static void glk_load_luts(const struct intel_crtc_state 
*crtc_state)
}
  }
  
+/* ilk+ "12.4" interpolated format (high 10 bits) */

+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+   return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+   (color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+   return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+   (color->blue & 0x3f);

Blue is missing the shift.

Ok,

I'm not 100% sure if the ldw vs. udw are the right way around. The spec
has at times been inconsistent with the odd vs. even descriptions,
sometimes even contradicting itself. Also it never really defines
whether it starts counting dwords from from 0 or 1, so not sure what
odd and even actually mean. Can I presume someone has checked this
on actual hardware?
Well, the property was getting set properly, and the display looked ok, 
but dint dump the values in registers. Can check it now.

+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+  const struct drm_color_lut *color)
+{
+   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+   enum pipe pipe = crtc->pipe;
+
+   /* Fixme: LUT entries are 16 bit only, so we can prog 0x max */
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+}
+
+static void
+icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
+{
+   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+   struct drm_i915_private *dev_priv = 

Re: [Intel-gfx] [PATCH v3 4/4] drm/i915/icl: Add Multi-segmented gamma support

2019-05-07 Thread Ville Syrjälä
On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:
> ICL introduces a new gamma correction mode in display engine, called
> multi-segmented-gamma mode. This mode allows users to program the
> darker region of the gamma curve with sueprfine precision. An
> example use case for this is HDR curves (like PQ ST-2084).
> 
> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> ICL's multi-segment has 3 different sections:
> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> - fine segment: 257 values, ranges between 0 - 1/(128)
> - corase segment: 257 values, ranges between 0 - 1
> 
> This patch:
> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>   so that userspace can program with highest precision supported.
> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> - Adds functions to program/detect multi-segment gamma.
> 
> V2: Addressed review comments from Ville
> - separate function for superfine and fine segments.
> - remove enum for segments.
> - reuse last entry of the LUT as gc_max value.
> - replace if() cond with switch...case in icl_load_luts.
> - add an entry variable, instead of 'word'
> 
> V3: Addressed review comments from Ville
> - extra newline
> - s/entry/color/
> - remove LUT size checks
> - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
> - Change the comments in description of fine and coarse segments,
>   and try to make more sense.
> - use 8 * 128 instead of 1024
> - add 1 entry in LUT for GCMAX
> 
> Cc: Ville Syrjälä 
> Cc: Maarten Lankhorst 
> Cc: Daniel Vetter 
> 
> Suggested-by: Ville Syrjälä 
> Signed-off-by: Shashank Sharma 
> Signed-off-by: Uma Shankar 
> ---
>  drivers/gpu/drm/i915/i915_pci.c|   2 +-
>  drivers/gpu/drm/i915/intel_color.c | 127 -
>  2 files changed, 124 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..2f99b585d44b 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -749,7 +749,7 @@ static const struct intel_device_info 
> intel_cannonlake_info = {
>   GEN(11), \
>   .ddb_size = 2048, \
>   .has_logical_ring_elsq = 1, \
> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>   GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_color.c 
> b/drivers/gpu/drm/i915/intel_color.c
> index 6c341bea514c..c1a9506fd069 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -41,6 +41,8 @@
>  #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1))
>  
>  #define LEGACY_LUT_LENGTH256
> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH(256 * 128 * 8)

Unused.

> +
>  /*
>   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>   * format). This macro takes the coefficient we want transformed and the
> @@ -767,6 +769,116 @@ static void glk_load_luts(const struct intel_crtc_state 
> *crtc_state)
>   }
>  }
>  
> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> +{
> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> + (color->blue >> 6);
> +}
> +
> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> +{
> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> + (color->blue & 0x3f);

Blue is missing the shift.

I'm not 100% sure if the ldw vs. udw are the right way around. The spec
has at times been inconsistent with the odd vs. even descriptions,
sometimes even contradicting itself. Also it never really defines
whether it starts counting dwords from from 0 or 1, so not sure what
odd and even actually mean. Can I presume someone has checked this
on actual hardware?

> +}
> +
> +static void
> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
> +const struct drm_color_lut *color)
> +{
> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> + enum pipe pipe = crtc->pipe;
> +
> + /* Fixme: LUT entries are 16 bit only, so we can prog 0x max */
> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
> +}
> +
> +static void
> +icl_program_gamma_superfine_segment(const struct intel_crtc_state 
> *crtc_state)
> +{
> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> + const 

[Intel-gfx] [PATCH v3 4/4] drm/i915/icl: Add Multi-segmented gamma support

2019-05-07 Thread Shashank Sharma
ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 1.0,
ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
  so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
- Adds functions to program/detect multi-segment gamma.

V2: Addressed review comments from Ville
- separate function for superfine and fine segments.
- remove enum for segments.
- reuse last entry of the LUT as gc_max value.
- replace if() cond with switch...case in icl_load_luts.
- add an entry variable, instead of 'word'

V3: Addressed review comments from Ville
- extra newline
- s/entry/color/
- remove LUT size checks
- program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
- Change the comments in description of fine and coarse segments,
  and try to make more sense.
- use 8 * 128 instead of 1024
- add 1 entry in LUT for GCMAX

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Daniel Vetter 

Suggested-by: Ville Syrjälä 
Signed-off-by: Shashank Sharma 
Signed-off-by: Uma Shankar 
---
 drivers/gpu/drm/i915/i915_pci.c|   2 +-
 drivers/gpu/drm/i915/intel_color.c | 127 -
 2 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..2f99b585d44b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,7 @@ static const struct intel_device_info intel_cannonlake_info 
= {
GEN(11), \
.ddb_size = 2048, \
.has_logical_ring_elsq = 1, \
-   .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+   .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
 
 static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c 
b/drivers/gpu/drm/i915/intel_color.c
index 6c341bea514c..c1a9506fd069 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,8 @@
 #define CTM_COEFF_ABS(coeff)   ((coeff) & (CTM_COEFF_SIGN - 1))
 
 #define LEGACY_LUT_LENGTH  256
+#define ICL_GAMMA_MULTISEG_LUT_LENGTH  (256 * 128 * 8)
+
 /*
  * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
  * format). This macro takes the coefficient we want transformed and the
@@ -767,6 +769,116 @@ static void glk_load_luts(const struct intel_crtc_state 
*crtc_state)
}
 }
 
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+   return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+   (color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+   return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+   (color->blue & 0x3f);
+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+  const struct drm_color_lut *color)
+{
+   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+   enum pipe pipe = crtc->pipe;
+
+   /* Fixme: LUT entries are 16 bit only, so we can prog 0x max */
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
+   I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+}
+
+static void
+icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
+{
+   struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+   const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+   const struct drm_color_lut *lut = blob->data;
+   enum pipe pipe = crtc->pipe;
+   u32 i;
+
+   /*
+* Every entry in the multi-segment LUT is corresponding to a superfine
+* segment step which is 1/(8 * 128 * 256).
+*
+* Superfine segment has 9 entries, corresponding to values
+* 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256)  8/(8 * 128 * 256).
+*/
+   I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+
+   for (i = 0; i < 9; i++) {
+   const struct drm_color_lut *entry = [i];
+
+