On Fri Feb 7 13:25:05 2025 +0530, Vedang Nagar wrote:
> Implement power scaling including a specific vpu2 and vpu3 calculation
> for clock and bus bandwidth, which depends on the hardware
> configuration, codec format, resolution and frame rate.
> 
> Signed-off-by: Vedang Nagar <quic_vna...@quicinc.com>
> Tested-by: Stefan Schmidt <stefan.schm...@linaro.org> # x1e80100 (Dell XPS 13 
> 9345)
> Reviewed-by: Stefan Schmidt <stefan.schm...@linaro.org>
> Tested-by: Neil Armstrong <neil.armstr...@linaro.org> # on SM8550-QRD
> Tested-by: Neil Armstrong <neil.armstr...@linaro.org> # on SM8550-HDK
> Signed-off-by: Dikshita Agarwal <quic_diksh...@quicinc.com>
> Signed-off-by: Hans Verkuil <hverk...@xs4all.nl>

Patch committed.

Thanks,
Hans Verkuil

 drivers/media/platform/qcom/iris/Makefile          |   1 +
 drivers/media/platform/qcom/iris/iris_buffer.c     |   3 +
 drivers/media/platform/qcom/iris/iris_instance.h   |   6 +
 .../platform/qcom/iris/iris_platform_common.h      |  23 ++++
 .../platform/qcom/iris/iris_platform_sm8550.c      |  12 ++
 drivers/media/platform/qcom/iris/iris_power.c      | 140 +++++++++++++++++++++
 drivers/media/platform/qcom/iris/iris_power.h      |  13 ++
 drivers/media/platform/qcom/iris/iris_vb2.c        |   3 +
 drivers/media/platform/qcom/iris/iris_vdec.c       |   7 ++
 drivers/media/platform/qcom/iris/iris_vpu2.c       |  27 ++++
 drivers/media/platform/qcom/iris/iris_vpu3.c       |  38 ++++++
 drivers/media/platform/qcom/iris/iris_vpu_common.h |   1 +
 12 files changed, 274 insertions(+)

---

diff --git a/drivers/media/platform/qcom/iris/Makefile 
b/drivers/media/platform/qcom/iris/Makefile
index ab16189aa9e6..ca31db847273 100644
--- a/drivers/media/platform/qcom/iris/Makefile
+++ b/drivers/media/platform/qcom/iris/Makefile
@@ -10,6 +10,7 @@ iris-objs += iris_buffer.o \
              iris_hfi_gen2_response.o \
              iris_hfi_queue.o \
              iris_platform_sm8550.o \
+             iris_power.o \
              iris_probe.o \
              iris_resources.o \
              iris_state.o \
diff --git a/drivers/media/platform/qcom/iris/iris_buffer.c 
b/drivers/media/platform/qcom/iris/iris_buffer.c
index dc096e5e95bf..e5c5a564fcb8 100644
--- a/drivers/media/platform/qcom/iris/iris_buffer.c
+++ b/drivers/media/platform/qcom/iris/iris_buffer.c
@@ -8,6 +8,7 @@
 
 #include "iris_buffer.h"
 #include "iris_instance.h"
+#include "iris_power.h"
 #include "iris_vpu_buffer.h"
 
 #define PIXELS_4K 4096
@@ -500,6 +501,8 @@ int iris_queue_deferred_buffers(struct iris_inst *inst, 
enum iris_buffer_type bu
        struct iris_buffer *buf;
        int ret;
 
+       iris_scale_power(inst);
+
        if (buf_type == BUF_INPUT) {
                v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buffer, n) {
                        buf = to_iris_buffer(&buffer->vb);
diff --git a/drivers/media/platform/qcom/iris/iris_instance.h 
b/drivers/media/platform/qcom/iris/iris_instance.h
index 89fb63644311..caa3c6507006 100644
--- a/drivers/media/platform/qcom/iris/iris_instance.h
+++ b/drivers/media/platform/qcom/iris/iris_instance.h
@@ -33,6 +33,9 @@
  * @state: instance state
  * @sub_state: instance sub state
  * @once_per_session_set: boolean to set once per session property
+ * @max_input_data_size: max size of input data
+ * @power: structure of power info
+ * @icc_data: structure of interconnect data
  * @m2m_dev:   a reference to m2m device structure
  * @m2m_ctx:   a reference to m2m context structure
  * @sequence_cap: a sequence counter for capture queue
@@ -60,6 +63,9 @@ struct iris_inst {
        enum iris_inst_state            state;
        enum iris_inst_sub_state        sub_state;
        bool                            once_per_session_set;
+       size_t                          max_input_data_size;
+       struct iris_inst_power          power;
+       struct icc_vote_data            icc_data;
        struct v4l2_m2m_dev             *m2m_dev;
        struct v4l2_m2m_ctx             *m2m_ctx;
        u32                             sequence_cap;
diff --git a/drivers/media/platform/qcom/iris/iris_platform_common.h 
b/drivers/media/platform/qcom/iris/iris_platform_common.h
index a5a7d6838d16..189dd081ad0a 100644
--- a/drivers/media/platform/qcom/iris/iris_platform_common.h
+++ b/drivers/media/platform/qcom/iris/iris_platform_common.h
@@ -20,6 +20,8 @@ struct iris_inst;
 #define CODED_FRAMES_PROGRESSIVE               0x0
 #define DEFAULT_MAX_HOST_BUF_COUNT             64
 #define DEFAULT_MAX_HOST_BURST_BUF_COUNT       256
+#define DEFAULT_FPS                            30
+
 enum stage_type {
        STAGE_1 = 1,
        STAGE_2 = 2,
@@ -67,6 +69,10 @@ struct platform_inst_caps {
        u32 min_frame_height;
        u32 max_frame_height;
        u32 max_mbpf;
+       u32 mb_cycles_vsp;
+       u32 mb_cycles_vpp;
+       u32 mb_cycles_fw;
+       u32 mb_cycles_fw_vpp;
        u32 num_comv;
 };
 
@@ -106,11 +112,26 @@ struct platform_inst_fw_cap {
                   enum platform_inst_fw_cap_type cap_id);
 };
 
+struct bw_info {
+       u32 mbs_per_sec;
+       u32 bw_ddr;
+};
+
 struct iris_core_power {
        u64 clk_freq;
        u64 icc_bw;
 };
 
+struct iris_inst_power {
+       u64 min_freq;
+       u32 icc_bw;
+};
+
+struct icc_vote_data {
+       u32 height, width;
+       u32 fps;
+};
+
 enum platform_pm_domain_type {
        IRIS_CTRL_POWER_DOMAIN,
        IRIS_HW_POWER_DOMAIN,
@@ -124,6 +145,8 @@ struct iris_platform_data {
        void (*set_preset_registers)(struct iris_core *core);
        const struct icc_info *icc_tbl;
        unsigned int icc_tbl_size;
+       const struct bw_info *bw_tbl_dec;
+       unsigned int bw_tbl_dec_size;
        const char * const *pmdomain_tbl;
        unsigned int pmdomain_tbl_size;
        const char * const *opp_pd_tbl;
diff --git a/drivers/media/platform/qcom/iris/iris_platform_sm8550.c 
b/drivers/media/platform/qcom/iris/iris_platform_sm8550.c
index 8d23978f5cee..35d278996c43 100644
--- a/drivers/media/platform/qcom/iris/iris_platform_sm8550.c
+++ b/drivers/media/platform/qcom/iris/iris_platform_sm8550.c
@@ -126,6 +126,9 @@ static struct platform_inst_caps platform_inst_cap_sm8550 = 
{
        .min_frame_height = 96,
        .max_frame_height = 8192,
        .max_mbpf = (8192 * 4352) / 256,
+       .mb_cycles_vpp = 200,
+       .mb_cycles_fw = 489583,
+       .mb_cycles_fw_vpp = 66234,
        .num_comv = 0,
 };
 
@@ -141,6 +144,13 @@ static const struct icc_info sm8550_icc_table[] = {
 
 static const char * const sm8550_clk_reset_table[] = { "bus" };
 
+static const struct bw_info sm8550_bw_table_dec[] = {
+       { ((4096 * 2160) / 256) * 60, 1608000 },
+       { ((4096 * 2160) / 256) * 30,  826000 },
+       { ((1920 * 1080) / 256) * 60,  567000 },
+       { ((1920 * 1080) / 256) * 30,  294000 },
+};
+
 static const char * const sm8550_pmdomain_table[] = { "venus", "vcodec0" };
 
 static const char * const sm8550_opp_pd_table[] = { "mxc", "mmcx" };
@@ -214,6 +224,8 @@ struct iris_platform_data sm8550_data = {
        .icc_tbl_size = ARRAY_SIZE(sm8550_icc_table),
        .clk_rst_tbl = sm8550_clk_reset_table,
        .clk_rst_tbl_size = ARRAY_SIZE(sm8550_clk_reset_table),
+       .bw_tbl_dec = sm8550_bw_table_dec,
+       .bw_tbl_dec_size = ARRAY_SIZE(sm8550_bw_table_dec),
        .pmdomain_tbl = sm8550_pmdomain_table,
        .pmdomain_tbl_size = ARRAY_SIZE(sm8550_pmdomain_table),
        .opp_pd_tbl = sm8550_opp_pd_table,
diff --git a/drivers/media/platform/qcom/iris/iris_power.c 
b/drivers/media/platform/qcom/iris/iris_power.c
new file mode 100644
index 000000000000..dbca42df0910
--- /dev/null
+++ b/drivers/media/platform/qcom/iris/iris_power.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights 
reserved.
+ */
+
+#include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "iris_buffer.h"
+#include "iris_instance.h"
+#include "iris_power.h"
+#include "iris_resources.h"
+#include "iris_vpu_common.h"
+
+static u32 iris_calc_bw(struct iris_inst *inst, struct icc_vote_data *data)
+{
+       const struct bw_info *bw_tbl = NULL;
+       struct iris_core *core = inst->core;
+       u32 num_rows, i, mbs, mbps;
+       u32 icc_bw = 0;
+
+       mbs = DIV_ROUND_UP(data->height, 16) * DIV_ROUND_UP(data->width, 16);
+       mbps = mbs * data->fps;
+       if (mbps == 0)
+               goto exit;
+
+       bw_tbl = core->iris_platform_data->bw_tbl_dec;
+       num_rows = core->iris_platform_data->bw_tbl_dec_size;
+
+       for (i = 0; i < num_rows; i++) {
+               if (i != 0 && mbps > bw_tbl[i].mbs_per_sec)
+                       break;
+
+               icc_bw = bw_tbl[i].bw_ddr;
+       }
+
+exit:
+       return icc_bw;
+}
+
+static int iris_set_interconnects(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       struct iris_inst *instance;
+       u64 total_bw_ddr = 0;
+       int ret;
+
+       mutex_lock(&core->lock);
+       list_for_each_entry(instance, &core->instances, list) {
+               if (!instance->max_input_data_size)
+                       continue;
+
+               total_bw_ddr += instance->power.icc_bw;
+       }
+
+       ret = iris_set_icc_bw(core, total_bw_ddr);
+
+       mutex_unlock(&core->lock);
+
+       return ret;
+}
+
+static int iris_vote_interconnects(struct iris_inst *inst)
+{
+       struct icc_vote_data *vote_data = &inst->icc_data;
+       struct v4l2_format *inp_f = inst->fmt_src;
+
+       vote_data->width = inp_f->fmt.pix_mp.width;
+       vote_data->height = inp_f->fmt.pix_mp.height;
+       vote_data->fps = DEFAULT_FPS;
+
+       inst->power.icc_bw = iris_calc_bw(inst, vote_data);
+
+       return iris_set_interconnects(inst);
+}
+
+static int iris_set_clocks(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       struct iris_inst *instance;
+       u64 freq = 0;
+       int ret;
+
+       mutex_lock(&core->lock);
+       list_for_each_entry(instance, &core->instances, list) {
+               if (!instance->max_input_data_size)
+                       continue;
+
+               freq += instance->power.min_freq;
+       }
+
+       core->power.clk_freq = freq;
+       ret = dev_pm_opp_set_rate(core->dev, freq);
+       mutex_unlock(&core->lock);
+
+       return ret;
+}
+
+static int iris_scale_clocks(struct iris_inst *inst)
+{
+       const struct vpu_ops *vpu_ops = inst->core->iris_platform_data->vpu_ops;
+       struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
+       struct v4l2_m2m_buffer *buffer, *n;
+       struct iris_buffer *buf;
+       size_t data_size = 0;
+
+       v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buffer, n) {
+               buf = to_iris_buffer(&buffer->vb);
+               data_size = max(data_size, buf->data_size);
+       }
+
+       inst->max_input_data_size = data_size;
+       if (!inst->max_input_data_size)
+               return 0;
+
+       inst->power.min_freq = vpu_ops->calc_freq(inst, 
inst->max_input_data_size);
+
+       return iris_set_clocks(inst);
+}
+
+int iris_scale_power(struct iris_inst *inst)
+{
+       struct iris_core *core = inst->core;
+       int ret;
+
+       if (pm_runtime_suspended(core->dev)) {
+               ret = pm_runtime_resume_and_get(core->dev);
+               if (ret < 0)
+                       return ret;
+
+               pm_runtime_put_autosuspend(core->dev);
+       }
+
+       ret = iris_scale_clocks(inst);
+       if (ret)
+               return ret;
+
+       return iris_vote_interconnects(inst);
+}
diff --git a/drivers/media/platform/qcom/iris/iris_power.h 
b/drivers/media/platform/qcom/iris/iris_power.h
new file mode 100644
index 000000000000..55212660e72d
--- /dev/null
+++ b/drivers/media/platform/qcom/iris/iris_power.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights 
reserved.
+ */
+
+#ifndef __IRIS_POWER_H__
+#define __IRIS_POWER_H__
+
+struct iris_inst;
+
+int iris_scale_power(struct iris_inst *inst);
+
+#endif
diff --git a/drivers/media/platform/qcom/iris/iris_vb2.c 
b/drivers/media/platform/qcom/iris/iris_vb2.c
index 712d37723ec3..cdf11feb590b 100644
--- a/drivers/media/platform/qcom/iris/iris_vb2.c
+++ b/drivers/media/platform/qcom/iris/iris_vb2.c
@@ -10,6 +10,7 @@
 #include "iris_instance.h"
 #include "iris_vb2.h"
 #include "iris_vdec.h"
+#include "iris_power.h"
 
 static int iris_check_core_mbpf(struct iris_inst *inst)
 {
@@ -187,6 +188,8 @@ int iris_vb2_start_streaming(struct vb2_queue *q, unsigned 
int count)
                goto error;
        }
 
+       iris_scale_power(inst);
+
        ret = iris_check_session_supported(inst);
        if (ret)
                goto error;
diff --git a/drivers/media/platform/qcom/iris/iris_vdec.c 
b/drivers/media/platform/qcom/iris/iris_vdec.c
index 076e3ee7969f..4143acedfc57 100644
--- a/drivers/media/platform/qcom/iris/iris_vdec.c
+++ b/drivers/media/platform/qcom/iris/iris_vdec.c
@@ -9,6 +9,7 @@
 #include "iris_buffer.h"
 #include "iris_ctrls.h"
 #include "iris_instance.h"
+#include "iris_power.h"
 #include "iris_vdec.h"
 #include "iris_vpu_buffer.h"
 
@@ -360,6 +361,8 @@ static int iris_vdec_process_streamon_input(struct 
iris_inst *inst)
        enum iris_inst_sub_state set_sub_state = 0;
        int ret;
 
+       iris_scale_power(inst);
+
        ret = hfi_ops->session_start(inst, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
        if (ret)
                return ret;
@@ -427,6 +430,8 @@ static int iris_vdec_process_streamon_output(struct 
iris_inst *inst)
        enum iris_inst_sub_state clear_sub_state = 0;
        int ret = 0;
 
+       iris_scale_power(inst);
+
        drain_active = inst->sub_state & IRIS_INST_SUB_DRAIN &&
                inst->sub_state & IRIS_INST_SUB_DRAIN_LAST;
 
@@ -573,6 +578,8 @@ int iris_vdec_qbuf(struct iris_inst *inst, struct 
vb2_v4l2_buffer *vbuf)
                return 0;
        }
 
+       iris_scale_power(inst);
+
        return iris_queue_buffer(inst, buf);
 }
 
diff --git a/drivers/media/platform/qcom/iris/iris_vpu2.c 
b/drivers/media/platform/qcom/iris/iris_vpu2.c
index bd8427411576..8f502aed43ce 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu2.c
+++ b/drivers/media/platform/qcom/iris/iris_vpu2.c
@@ -6,6 +6,33 @@
 #include "iris_instance.h"
 #include "iris_vpu_common.h"
 
+static u64 iris_vpu2_calc_freq(struct iris_inst *inst, size_t data_size)
+{
+       struct platform_inst_caps *caps = 
inst->core->iris_platform_data->inst_caps;
+       struct v4l2_format *inp_f = inst->fmt_src;
+       u32 mbs_per_second, mbpf, height, width;
+       unsigned long vpp_freq, vsp_freq;
+       u32 fps = DEFAULT_FPS;
+
+       width = max(inp_f->fmt.pix_mp.width, inst->crop.width);
+       height = max(inp_f->fmt.pix_mp.height, inst->crop.height);
+
+       mbpf = NUM_MBS_PER_FRAME(height, width);
+       mbs_per_second = mbpf * fps;
+
+       vpp_freq = mbs_per_second * caps->mb_cycles_vpp;
+
+       /* 21 / 20 is overhead factor */
+       vpp_freq += vpp_freq / 20;
+       vsp_freq = mbs_per_second * caps->mb_cycles_vsp;
+
+       /* 10 / 7 is overhead factor */
+       vsp_freq += ((fps * data_size * 8) * 10) / 7;
+
+       return max(vpp_freq, vsp_freq);
+}
+
 const struct vpu_ops iris_vpu2_ops = {
        .power_off_hw = iris_vpu_power_off_hw,
+       .calc_freq = iris_vpu2_calc_freq,
 };
diff --git a/drivers/media/platform/qcom/iris/iris_vpu3.c 
b/drivers/media/platform/qcom/iris/iris_vpu3.c
index 10599f1fa789..b484638e6105 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu3.c
+++ b/drivers/media/platform/qcom/iris/iris_vpu3.c
@@ -79,6 +79,44 @@ disable_power:
        iris_vpu_power_off_hw(core);
 }
 
+static u64 iris_vpu3_calculate_frequency(struct iris_inst *inst, size_t 
data_size)
+{
+       struct platform_inst_caps *caps = 
inst->core->iris_platform_data->inst_caps;
+       struct v4l2_format *inp_f = inst->fmt_src;
+       u32 height, width, mbs_per_second, mbpf;
+       u64 fw_cycles, fw_vpp_cycles;
+       u64 vsp_cycles, vpp_cycles;
+       u32 fps = DEFAULT_FPS;
+
+       width = max(inp_f->fmt.pix_mp.width, inst->crop.width);
+       height = max(inp_f->fmt.pix_mp.height, inst->crop.height);
+
+       mbpf = NUM_MBS_PER_FRAME(height, width);
+       mbs_per_second = mbpf * fps;
+
+       fw_cycles = fps * caps->mb_cycles_fw;
+       fw_vpp_cycles = fps * caps->mb_cycles_fw_vpp;
+
+       vpp_cycles = mult_frac(mbs_per_second, caps->mb_cycles_vpp, 
(u32)inst->fw_caps[PIPE].value);
+       /* 21 / 20 is minimum overhead factor */
+       vpp_cycles += max(div_u64(vpp_cycles, 20), fw_vpp_cycles);
+
+       /* 1.059 is multi-pipe overhead */
+       if (inst->fw_caps[PIPE].value > 1)
+               vpp_cycles += div_u64(vpp_cycles * 59, 1000);
+
+       vsp_cycles = fps * data_size * 8;
+       vsp_cycles = div_u64(vsp_cycles, 2);
+       /* VSP FW overhead 1.05 */
+       vsp_cycles = div_u64(vsp_cycles * 21, 20);
+
+       if (inst->fw_caps[STAGE].value == STAGE_1)
+               vsp_cycles = vsp_cycles * 3;
+
+       return max3(vpp_cycles, vsp_cycles, fw_cycles);
+}
+
 const struct vpu_ops iris_vpu3_ops = {
        .power_off_hw = iris_vpu3_power_off_hardware,
+       .calc_freq = iris_vpu3_calculate_frequency,
 };
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_common.h 
b/drivers/media/platform/qcom/iris/iris_vpu_common.h
index d3efa7c0ce9a..63fa1fa5a498 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu_common.h
+++ b/drivers/media/platform/qcom/iris/iris_vpu_common.h
@@ -13,6 +13,7 @@ extern const struct vpu_ops iris_vpu3_ops;
 
 struct vpu_ops {
        void (*power_off_hw)(struct iris_core *core);
+       u64 (*calc_freq)(struct iris_inst *inst, size_t data_size);
 };
 
 int iris_vpu_boot_firmware(struct iris_core *core);

Reply via email to