cm3_helper_translate_curve_to_degamma_hw_format() reads one tf_pts entry
per HW LUT point, limiting the number of samples per region to
NUMBER_SW_SEGMENTS (16, at seg_distr[k] = 4) - higher seg_distr[k]
underflows the increment to 0. But the next patch introduces a halving
distribution for PQ/sRGB EOTFs that requires up to 128 samples in its
upper region (seg_distr[k] = 7).

As preparation, extend the loop index by 4 bits and linearly interpolate
adjacent tf_pts entries with the new interp_tf_pts() helper, where the 4
least significant bits are weight in 1/16 increments. This raises the
cap to 256 samples per region (seg_distr[k] = 8). seg_distr[k] <= 4
paths remain unchanged: the 4 least significant bits remain zero and
interp_tf_pts() reduces to a direct lookup.

Co-developed-by: Harry Wentland <[email protected]>
Signed-off-by: Harry Wentland <[email protected]>
Signed-off-by: Melissa Wen <[email protected]>
---
 .../amd/display/dc/dcn30/dcn30_cm_common.c    | 32 +++++++++++++++----
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
index 0949b1dffc63..70b7bc3494a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
@@ -305,6 +305,22 @@ bool cm3_helper_translate_curve_to_hw_format(struct 
dc_context *ctx,
 
 #define NUM_DEGAMMA_REGIONS    12
 
+/* Linear interpolation of tf_pts entries, where (i >> 4) is the integer tf_pts
+ * index, (i & 0xf) is the 1/16 sub-position.
+ */
+static struct fixed31_32 interp_tf_pts(const struct fixed31_32 
*output_tf_channel, int i)
+{
+       struct fixed31_32 in_plus_one, in, value;
+       uint32_t t = i & 0xf;
+
+       in_plus_one = output_tf_channel[(i >> 4) + 1];
+       in = output_tf_channel[i >> 4];
+       value = dc_fixpt_sub(in_plus_one, in);
+       value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4);
+       value = dc_fixpt_add(in, value);
+
+       return value;
+}
 
 bool cm3_helper_translate_curve_to_degamma_hw_format(
                                const struct dc_transfer_func *output_tf,
@@ -348,18 +364,20 @@ bool cm3_helper_translate_curve_to_degamma_hw_format(
 
        j = 0;
        for (k = 0; k < (region_end - region_start); k++) {
-               increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
+               increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]);
                start_index = (region_start + k + MAX_LOW_POINT) *
                                NUMBER_SW_SEGMENTS;
-               for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
-                               i += increment) {
+               for (i = (start_index << 4);
+                    i < (start_index << 4) + (NUMBER_SW_SEGMENTS << 4);
+                    i += increment) {
                        if (j == hw_points - 1)
                                break;
-                       if (i >= TRANSFER_FUNC_POINTS)
+                       if ((i >> 4) + 1 >= TRANSFER_FUNC_POINTS)
                                return false;
-                       rgb_resulted[j].red = output_tf->tf_pts.red[i];
-                       rgb_resulted[j].green = output_tf->tf_pts.green[i];
-                       rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
+
+                       rgb_resulted[j].red = 
interp_tf_pts(output_tf->tf_pts.red, i);
+                       rgb_resulted[j].green = 
interp_tf_pts(output_tf->tf_pts.green, i);
+                       rgb_resulted[j].blue = 
interp_tf_pts(output_tf->tf_pts.blue, i);
                        j++;
                }
        }
-- 
2.53.0

Reply via email to