From: Austin Zheng <[email protected]>

Why:
Certain display configs resulted in underflow

How:
Add an entry containing all max DC clock timings

Reviewed-by: Alvin Lee <[email protected]>
Acked-by: Hamza Mahfooz <[email protected]>
Signed-off-by: Austin Zheng <[email protected]>
---
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 90 ++++++++++++++++--
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  4 -
 .../amd/display/dc/dml/dcn321/dcn321_fpu.c    | 92 +++++++++++++++++--
 .../amd/display/dc/dml/dcn321/dcn321_fpu.h    |  4 -
 .../amd/display/dc/dml/display_mode_structs.h |  1 +
 5 files changed, 171 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index e2bb2b9971f3..a95034801712 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -485,24 +485,20 @@ static void get_optimal_ntuple(struct 
_vcs_dpi_voltage_scaling_st *entry)
        }
 }
 
-void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st 
*table,
                                    unsigned int *num_entries,
                                    struct _vcs_dpi_voltage_scaling_st *entry)
 {
        int i = 0;
        int index = 0;
-       float net_bw_of_new_state = 0;
 
        dc_assert_fp_enabled();
 
-       get_optimal_ntuple(entry);
-
        if (*num_entries == 0) {
                table[0] = *entry;
                (*num_entries)++;
        } else {
-               net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
-               while (net_bw_of_new_state > 
calculate_net_bw_in_kbytes_sec(&table[index])) {
+               while (entry->net_bw_in_kbytes_sec > 
table[index].net_bw_in_kbytes_sec) {
                        index++;
                        if (index >= *num_entries)
                                break;
@@ -2349,6 +2345,63 @@ void dcn32_patch_dpm_table(struct clk_bw_params 
*bw_params)
                bw_params->clk_table.entries[0].memclk_mhz = 
dcn3_2_soc.clock_limits[0].dram_speed_mts / 16;
 }
 
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+               struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+       struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+       *first_entry = *second_entry;
+       *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by 
DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st 
*table, unsigned int *num_entries)
+{
+       unsigned int start_index = 0;
+       unsigned int end_index = 0;
+       unsigned int current_bw = 0;
+
+       for (int i = 0; i < (*num_entries - 1); i++) {
+               if (table[i].net_bw_in_kbytes_sec == 
table[i+1].net_bw_in_kbytes_sec) {
+                       current_bw = table[i].net_bw_in_kbytes_sec;
+                       start_index = i;
+                       end_index = ++i;
+
+                       while ((i < (*num_entries - 1)) && 
(table[i+1].net_bw_in_kbytes_sec == current_bw))
+                               end_index = ++i;
+               }
+
+               if (start_index != end_index) {
+                       for (int j = start_index; j < end_index; j++) {
+                               for (int k = start_index; k < end_index; k++) {
+                                       if (table[k].dcfclk_mhz > 
table[k+1].dcfclk_mhz)
+                                               swap_table_entries(&table[k], 
&table[k+1]);
+                               }
+                       }
+               }
+
+               start_index = 0;
+               end_index = 0;
+
+       }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have 
MEMCLK and FCLK monotonically increasing
+ *                               and remove entries that do not
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st 
*table, unsigned int *num_entries)
+{
+       for (int i = 0; i < (*num_entries - 1); i++) {
+               if (table[i].net_bw_in_kbytes_sec == 
table[i+1].net_bw_in_kbytes_sec) {
+                       if ((table[i].dram_speed_mts > 
table[i+1].dram_speed_mts) ||
+                               (table[i].fabricclk_mhz > 
table[i+1].fabricclk_mhz))
+                               remove_entry_from_table_at_index(table, 
num_entries, i);
+               }
+       }
+}
+
 /*
  * override_max_clk_values - Overwrite the max clock frequencies with the max 
DC mode timings
  * Input:
@@ -2480,6 +2533,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = 0;
                entry.dram_speed_mts = 0;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                insert_entry_into_table_sorted(table, num_entries, &entry);
        }
 
@@ -2488,6 +2543,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
        entry.fabricclk_mhz = 0;
        entry.dram_speed_mts = 0;
 
+       get_optimal_ntuple(&entry);
+       entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
        insert_entry_into_table_sorted(table, num_entries, &entry);
 
        // Insert the UCLK DPMS
@@ -2496,6 +2553,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = 0;
                entry.dram_speed_mts = 
bw_params->clk_table.entries[i].memclk_mhz * 16;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                insert_entry_into_table_sorted(table, num_entries, &entry);
        }
 
@@ -2506,6 +2565,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                        entry.fabricclk_mhz = 
bw_params->clk_table.entries[i].fclk_mhz;
                        entry.dram_speed_mts = 0;
 
+                       get_optimal_ntuple(&entry);
+                       entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                        insert_entry_into_table_sorted(table, num_entries, 
&entry);
                }
        }
@@ -2515,6 +2576,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = max_clk_data.fclk_mhz;
                entry.dram_speed_mts = 0;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                insert_entry_into_table_sorted(table, num_entries, &entry);
        }
 
@@ -2530,6 +2593,21 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                        remove_entry_from_table_at_index(table, num_entries, i);
        }
 
+       // Insert entry with all max dc limits without bandwidth matching
+       if (!disable_dc_mode_overwrite) {
+               struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+               max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+               max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+               max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 
16;
+
+               max_dc_limits_entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+               insert_entry_into_table_sorted(table, num_entries, 
&max_dc_limits_entry);
+
+               sort_entries_with_same_bw(table, num_entries);
+               remove_inconsistent_entries(table, num_entries);
+       }
+
        // At this point, the table only contains supported points of interest
        // it could be used as is, but some states may be redundant due to
        // coarse grained nature of some clocks, so we want to round up to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index a4206b71d650..defbee866be6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -39,10 +39,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
 uint8_t dcn32_predict_pipe_split(struct dc_state *context,
                                  display_e2e_pipe_params_st *pipe_e2e);
 
-void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
-                                   unsigned int *num_entries,
-                                   struct _vcs_dpi_voltage_scaling_st *entry);
-
 void dcn32_set_phantom_stream_timing(struct dc *dc,
                                     struct dc_state *context,
                                     struct pipe_ctx *ref_pipe,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index f0683fd9d3f0..190776063f46 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -207,24 +207,20 @@ static float calculate_net_bw_in_kbytes_sec(struct 
_vcs_dpi_voltage_scaling_st *
        return limiting_bw_kbytes_sec;
 }
 
-void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st 
*table,
+static void dcn321_insert_entry_into_table_sorted(struct 
_vcs_dpi_voltage_scaling_st *table,
                                           unsigned int *num_entries,
                                           struct _vcs_dpi_voltage_scaling_st 
*entry)
 {
        int i = 0;
        int index = 0;
-       float net_bw_of_new_state = 0;
 
        dc_assert_fp_enabled();
 
-       get_optimal_ntuple(entry);
-
        if (*num_entries == 0) {
                table[0] = *entry;
                (*num_entries)++;
        } else {
-               net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
-               while (net_bw_of_new_state > 
calculate_net_bw_in_kbytes_sec(&table[index])) {
+               while (entry->net_bw_in_kbytes_sec > 
table[index].net_bw_in_kbytes_sec) {
                        index++;
                        if (index >= *num_entries)
                                break;
@@ -252,6 +248,63 @@ static void remove_entry_from_table_at_index(struct 
_vcs_dpi_voltage_scaling_st
        memset(&table[--(*num_entries)], 0, sizeof(struct 
_vcs_dpi_voltage_scaling_st));
 }
 
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+               struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+       struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+       *first_entry = *second_entry;
+       *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by 
DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st 
*table, unsigned int *num_entries)
+{
+       unsigned int start_index = 0;
+       unsigned int end_index = 0;
+       unsigned int current_bw = 0;
+
+       for (int i = 0; i < (*num_entries - 1); i++) {
+               if (table[i].net_bw_in_kbytes_sec == 
table[i+1].net_bw_in_kbytes_sec) {
+                       current_bw = table[i].net_bw_in_kbytes_sec;
+                       start_index = i;
+                       end_index = ++i;
+
+                       while ((i < (*num_entries - 1)) && 
(table[i+1].net_bw_in_kbytes_sec == current_bw))
+                               end_index = ++i;
+               }
+
+               if (start_index != end_index) {
+                       for (int j = start_index; j < end_index; j++) {
+                               for (int k = start_index; k < end_index; k++) {
+                                       if (table[k].dcfclk_mhz > 
table[k+1].dcfclk_mhz)
+                                               swap_table_entries(&table[k], 
&table[k+1]);
+                               }
+                       }
+               }
+
+               start_index = 0;
+               end_index = 0;
+
+       }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have 
MEMCLK and FCLK monotonically increasing
+ *                               and remove entries that do not follow this 
order
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st 
*table, unsigned int *num_entries)
+{
+       for (int i = 0; i < (*num_entries - 1); i++) {
+               if (table[i].net_bw_in_kbytes_sec == 
table[i+1].net_bw_in_kbytes_sec) {
+                       if ((table[i].dram_speed_mts > 
table[i+1].dram_speed_mts) ||
+                               (table[i].fabricclk_mhz > 
table[i+1].fabricclk_mhz))
+                               remove_entry_from_table_at_index(table, 
num_entries, i);
+               }
+       }
+}
+
 /*
  * override_max_clk_values - Overwrite the max clock frequencies with the max 
DC mode timings
  * Input:
@@ -383,6 +436,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = 0;
                entry.dram_speed_mts = 0;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                dcn321_insert_entry_into_table_sorted(table, num_entries, 
&entry);
        }
 
@@ -391,6 +446,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
        entry.fabricclk_mhz = 0;
        entry.dram_speed_mts = 0;
 
+       get_optimal_ntuple(&entry);
+       entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
        dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
 
        // Insert the UCLK DPMS
@@ -399,6 +456,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = 0;
                entry.dram_speed_mts = 
bw_params->clk_table.entries[i].memclk_mhz * 16;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                dcn321_insert_entry_into_table_sorted(table, num_entries, 
&entry);
        }
 
@@ -409,6 +468,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                        entry.fabricclk_mhz = 
bw_params->clk_table.entries[i].fclk_mhz;
                        entry.dram_speed_mts = 0;
 
+                       get_optimal_ntuple(&entry);
+                       entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                        dcn321_insert_entry_into_table_sorted(table, 
num_entries, &entry);
                }
        }
@@ -418,6 +479,8 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                entry.fabricclk_mhz = max_clk_data.fclk_mhz;
                entry.dram_speed_mts = 0;
 
+               get_optimal_ntuple(&entry);
+               entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&entry);
                dcn321_insert_entry_into_table_sorted(table, num_entries, 
&entry);
        }
 
@@ -433,6 +496,23 @@ static int build_synthetic_soc_states(bool 
disable_dc_mode_overwrite, struct clk
                        remove_entry_from_table_at_index(table, num_entries, i);
        }
 
+       // Insert entry with all max dc limits without bandwitch matching
+       if (!disable_dc_mode_overwrite) {
+               struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+               max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+               max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+               max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 
16;
+
+               max_dc_limits_entry.net_bw_in_kbytes_sec = 
calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+               dcn321_insert_entry_into_table_sorted(table, num_entries, 
&max_dc_limits_entry);
+
+               sort_entries_with_same_bw(table, num_entries);
+               remove_inconsistent_entries(table, num_entries);
+       }
+
+
+
        // At this point, the table only contains supported points of interest
        // it could be used as is, but some states may be redundant due to
        // coarse grained nature of some clocks, so we want to round up to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
index e8fad9b4be69..c6623b3705ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
@@ -29,10 +29,6 @@
 
 #include "dml/display_mode_vba.h"
 
-void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st 
*table,
-                                          unsigned int *num_entries,
-                                          struct _vcs_dpi_voltage_scaling_st 
*entry);
-
 void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params 
*bw_params);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index ff0246a9458f..fb17f8868cb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -167,6 +167,7 @@ struct _vcs_dpi_voltage_scaling_st {
        double phyclk_mhz;
        double dppclk_mhz;
        double dtbclk_mhz;
+       float net_bw_in_kbytes_sec;
 };
 
 /**
-- 
2.40.1

Reply via email to