cm3_helper_translate_curve_to_degamma_hw_format() reads one tf_pts entry per HW LUT point, limiting the number of samples per region to NUMBER_SW_SEGMENTS (16, at seg_distr[k] = 4) - higher seg_distr[k] underflows the increment to 0. But the next patch introduces a halving distribution for PQ/sRGB EOTFs that requires up to 128 samples in its upper region (seg_distr[k] = 7).
As preparation, extend the loop index by 4 bits and linearly interpolate adjacent tf_pts entries with the new interp_tf_pts() helper, where the 4 least significant bits are weight in 1/16 increments. This raises the cap to 256 samples per region (seg_distr[k] = 8). seg_distr[k] <= 4 paths remain unchanged: the 4 least significant bits remain zero and interp_tf_pts() reduces to a direct lookup. Co-developed-by: Harry Wentland <[email protected]> Signed-off-by: Harry Wentland <[email protected]> Signed-off-by: Melissa Wen <[email protected]> --- .../amd/display/dc/dcn30/dcn30_cm_common.c | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 0949b1dffc63..70b7bc3494a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -305,6 +305,22 @@ bool cm3_helper_translate_curve_to_hw_format(struct dc_context *ctx, #define NUM_DEGAMMA_REGIONS 12 +/* Linear interpolation of tf_pts entries, where (i >> 4) is the integer tf_pts + * index, (i & 0xf) is the 1/16 sub-position. + */ +static struct fixed31_32 interp_tf_pts(const struct fixed31_32 *output_tf_channel, int i) +{ + struct fixed31_32 in_plus_one, in, value; + uint32_t t = i & 0xf; + + in_plus_one = output_tf_channel[(i >> 4) + 1]; + in = output_tf_channel[i >> 4]; + value = dc_fixpt_sub(in_plus_one, in); + value = dc_fixpt_shr(dc_fixpt_mul_int(value, t), 4); + value = dc_fixpt_add(in, value); + + return value; +} bool cm3_helper_translate_curve_to_degamma_hw_format( const struct dc_transfer_func *output_tf, @@ -348,18 +364,20 @@ bool cm3_helper_translate_curve_to_degamma_hw_format( j = 0; for (k = 0; k < (region_end - region_start); k++) { - increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); + increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]); start_index = (region_start + k + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; - for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; - i += increment) { + for (i = (start_index << 4); + i < (start_index << 4) + (NUMBER_SW_SEGMENTS << 4); + i += increment) { if (j == hw_points - 1) break; - if (i >= TRANSFER_FUNC_POINTS) + if ((i >> 4) + 1 >= TRANSFER_FUNC_POINTS) return false; - rgb_resulted[j].red = output_tf->tf_pts.red[i]; - rgb_resulted[j].green = output_tf->tf_pts.green[i]; - rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; + + rgb_resulted[j].red = interp_tf_pts(output_tf->tf_pts.red, i); + rgb_resulted[j].green = interp_tf_pts(output_tf->tf_pts.green, i); + rgb_resulted[j].blue = interp_tf_pts(output_tf->tf_pts.blue, i); j++; } } -- 2.53.0
