[Nouveau] [PATCH v2 2/7] clk: Remove dstate
We won't need it now, because we will adjust the clocks depending on engine loads later on anyway or a static lookup table. It also simplifies the clocking logic. This code was nowhere used anyway and just a mock up. v2: fixed typo in commit message Signed-off-by: Karol HerbstReviewed-by: Martin Peres --- drm/nouveau/include/nvkm/subdev/clk.h | 2 -- drm/nouveau/nvkm/subdev/clk/base.c| 16 ++-- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h index ce3bbcfe..1340f5b8 100644 --- a/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drm/nouveau/include/nvkm/subdev/clk.h @@ -99,7 +99,6 @@ struct nvkm_clk { int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */ int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */ int astate; /* perfmon adjustment (base) */ - int dstate; /* display adjustment (min+) */ u8 temp; bool allow_reclock; @@ -121,7 +120,6 @@ struct nvkm_clk { int nvkm_clk_read(struct nvkm_clk *, enum nv_clk_src); int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr); int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait); -int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel); int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature); int nvkm_clk_update(struct nvkm_clk *, bool wait); diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index ecff3ff3..07d530ed 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -306,15 +306,14 @@ nvkm_clk_update_work(struct work_struct *work) return; clk->pwrsrc = power_supply_is_system_supplied(); - nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C D %d\n", + nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n", clk->pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc, - clk->astate, clk->temp, clk->dstate); + clk->astate, clk->temp); pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc; if (clk->state_nr && pstate != -1) { pstate = (pstate < 0) ? clk->astate : pstate; pstate = min(pstate, clk->state_nr - 1); - pstate = max(pstate, clk->dstate); } else { pstate = clk->pstate = -1; } @@ -554,16 +553,6 @@ nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp) return nvkm_clk_update(clk, false); } -int -nvkm_clk_dstate(struct nvkm_clk *clk, int req, int rel) -{ - if (!rel) clk->dstate = req; - if ( rel) clk->dstate += rel; - clk->dstate = min(clk->dstate, clk->state_nr - 1); - clk->dstate = max(clk->dstate, 0); - return nvkm_clk_update(clk, true); -} - static int nvkm_clk_pwrsrc(struct nvkm_notify *notify) { @@ -621,7 +610,6 @@ nvkm_clk_init(struct nvkm_subdev *subdev) return clk->func->init(clk); clk->astate = clk->state_nr - 1; - clk->dstate = 0; clk->pstate = -1; clk->temp = 90; /* reasonable default value */ nvkm_clk_update(clk, true); -- 2.13.2 ___ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 3/7] clk: Make pstate a pointer to nvkm_pstate
We will access the current cstate at least every second and this saves us some CPU cycles looking them up every second. v2: Rewording commit message. Signed-off-by: Karol HerbstReviewed-by: Martin Peres --- drm/nouveau/include/nvkm/subdev/clk.h | 4 +++- drm/nouveau/nvkm/engine/device/ctrl.c | 5 - drm/nouveau/nvkm/subdev/clk/base.c| 17 - drm/nouveau/nvkm/subdev/pmu/gk20a.c | 18 +++--- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h index 1340f5b8..ec537e08 100644 --- a/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drm/nouveau/include/nvkm/subdev/clk.h @@ -10,6 +10,8 @@ struct nvkm_pll_vals; #define NVKM_CLK_CSTATE_BASE-2 /* pstate base */ #define NVKM_CLK_CSTATE_HIGHEST -3 /* highest possible */ +#define NVKM_CLK_PSTATE_DEFAULT -1 + enum nv_clk_src { nv_clk_src_crystal, nv_clk_src_href, @@ -95,7 +97,7 @@ struct nvkm_clk { struct nvkm_notify pwrsrc_ntfy; int pwrsrc; - int pstate; /* current */ + struct nvkm_pstate *pstate; /* current */ int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */ int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */ int astate; /* perfmon adjustment (base) */ diff --git a/drm/nouveau/nvkm/engine/device/ctrl.c b/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71a..da70626c 100644 --- a/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drm/nouveau/nvkm/engine/device/ctrl.c @@ -52,7 +52,10 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.ustate_ac = clk->ustate_ac; args->v0.ustate_dc = clk->ustate_dc; args->v0.pwrsrc = clk->pwrsrc; - args->v0.pstate = clk->pstate; + if (clk->pstate) + args->v0.pstate = clk->pstate->pstate; + else + args->v0.pstate = NVKM_CLK_PSTATE_DEFAULT; } else { args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index 07d530ed..0d4d9fdf 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -271,13 +271,16 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei) struct nvkm_pstate *pstate; int ret, idx = 0; + if (pstatei == NVKM_CLK_PSTATE_DEFAULT) + return 0; + list_for_each_entry(pstate, >states, head) { if (idx++ == pstatei) break; } nvkm_debug(subdev, "setting performance state %d\n", pstatei); - clk->pstate = pstatei; + clk->pstate = pstate; nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width); @@ -306,8 +309,12 @@ nvkm_clk_update_work(struct work_struct *work) return; clk->pwrsrc = power_supply_is_system_supplied(); + if (clk->pstate) + pstate = clk->pstate->pstate; + else + pstate = NVKM_CLK_PSTATE_DEFAULT; nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n", - clk->pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc, + pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc, clk->astate, clk->temp); pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc; @@ -315,11 +322,11 @@ nvkm_clk_update_work(struct work_struct *work) pstate = (pstate < 0) ? clk->astate : pstate; pstate = min(pstate, clk->state_nr - 1); } else { - pstate = clk->pstate = -1; + pstate = NVKM_CLK_PSTATE_DEFAULT; } nvkm_trace(subdev, "-> %d\n", pstate); - if (pstate != clk->pstate) { + if (!clk->pstate || pstate != clk->pstate->pstate) { int ret = nvkm_pstate_prog(clk, pstate); if (ret) { nvkm_error(subdev, "error setting pstate %d: %d\n", @@ -610,7 +617,7 @@ nvkm_clk_init(struct nvkm_subdev *subdev) return clk->func->init(clk); clk->astate = clk->state_nr - 1; - clk->pstate = -1; + clk->pstate = NULL; clk->temp = 90; /* reasonable default value */ nvkm_clk_update(clk, true); return 0; diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c index 978aae3c..3dd550c3 100644 --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c @@ -55,24 +55,22 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state) return nvkm_clk_astate(clk, *state, 0, false); } -static void -gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state) -{ - struct nvkm_clk *clk =
[Nouveau] [PATCH v2 4/7] clk: Hold information about the current cstate status
Later we will have situations where the expected and the current state isn't the same. Signed-off-by: Karol HerbstReviewed-by: Martin Peres --- drm/nouveau/include/nvkm/subdev/clk.h | 2 ++ drm/nouveau/nvkm/subdev/clk/base.c| 32 +--- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h index ec537e08..f35518c3 100644 --- a/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drm/nouveau/include/nvkm/subdev/clk.h @@ -101,6 +101,8 @@ struct nvkm_clk { int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */ int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */ int astate; /* perfmon adjustment (base) */ + struct nvkm_cstate *cstate; + int exp_cstateid; u8 temp; bool allow_reclock; diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index 0d4d9fdf..d37c13b7 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -146,9 +146,14 @@ static struct nvkm_cstate * nvkm_cstate_get(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei) { struct nvkm_cstate *cstate; - if (cstatei == NVKM_CLK_CSTATE_HIGHEST) + switch (cstatei) { + case NVKM_CLK_CSTATE_HIGHEST: return list_last_entry(>list, typeof(*cstate), head); - else { + case NVKM_CLK_CSTATE_BASE: + return >base; + case NVKM_CLK_CSTATE_DEFAULT: + return NULL; + default: list_for_each_entry(cstate, >list, head) { if (cstate->id == cstatei) return cstate; @@ -167,6 +172,9 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei) struct nvkm_cstate *cstate; int ret; + if (cstatei == NVKM_CLK_CSTATE_DEFAULT) + return 0; + if (!list_empty(>list)) { cstate = nvkm_cstate_get(clk, pstate, cstatei); cstate = nvkm_cstate_find_best(clk, pstate, cstate); @@ -193,6 +201,7 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei) ret = clk->func->calc(clk, cstate); if (ret == 0) { + clk->cstate = cstate; ret = clk->func->prog(clk); clk->func->tidy(clk); } @@ -295,7 +304,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei) ram->func->tidy(ram); } - return nvkm_cstate_prog(clk, pstate, NVKM_CLK_CSTATE_HIGHEST); + return nvkm_cstate_prog(clk, pstate, clk->exp_cstateid); } static void @@ -313,9 +322,9 @@ nvkm_clk_update_work(struct work_struct *work) pstate = clk->pstate->pstate; else pstate = NVKM_CLK_PSTATE_DEFAULT; - nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C\n", + nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d C %d T %d°C\n", pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc, - clk->astate, clk->temp); + clk->astate, clk->exp_cstateid, clk->temp); pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc; if (clk->state_nr && pstate != -1) { @@ -536,6 +545,7 @@ nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr) if (ret >= 0) { if (ret -= 2, pwr) clk->ustate_ac = ret; else clk->ustate_dc = ret; + clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST; return nvkm_clk_update(clk, true); } return ret; @@ -548,6 +558,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, bool wait) if ( rel) clk->astate += rel; clk->astate = min(clk->astate, clk->state_nr - 1); clk->astate = max(clk->astate, 0); + clk->exp_cstateid = NVKM_CLK_CSTATE_BASE; return nvkm_clk_update(clk, wait); } @@ -618,6 +629,8 @@ nvkm_clk_init(struct nvkm_subdev *subdev) clk->astate = clk->state_nr - 1; clk->pstate = NULL; + clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT; + clk->cstate = NULL; clk->temp = 90; /* reasonable default value */ nvkm_clk_update(clk, true); return 0; @@ -701,15 +714,20 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device, if (mode) { clk->ustate_ac = nvkm_clk_nstate(clk, mode, arglen); clk->ustate_dc = nvkm_clk_nstate(clk, mode, arglen); + clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST; } mode = nvkm_stropt(device->cfgopt, "NvClkModeAC", ); - if (mode) + if (mode) { clk->ustate_ac = nvkm_clk_nstate(clk, mode, arglen); + clk->exp_cstateid = NVKM_CLK_CSTATE_HIGHEST; + } mode =
[Nouveau] [PATCH v2 7/7] clk: Check pm_runtime status before reclocking
We don't want to change anything on the GPU if it's suspended. Also we need to increase the refcount on the pm_runtime counter so that the GPU won't be suspended while reclocking. v2: convert to C style comments Signed-off-by: Karol Herbst--- drm/nouveau/nvkm/subdev/clk/base.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index 54188d2b..81093e13 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -315,6 +315,7 @@ nvkm_clk_update_work(struct work_struct *work) { struct nvkm_clk *clk = container_of(work, typeof(*clk), work); struct nvkm_subdev *subdev = >subdev; + struct device *dev = subdev->device->dev; int pstate; if (!atomic_xchg(>waiting, 0)) @@ -337,8 +338,17 @@ nvkm_clk_update_work(struct work_struct *work) } nvkm_trace(subdev, "-> %d\n", pstate); - if (!clk->pstate || pstate != clk->pstate->pstate) { - int ret = nvkm_pstate_prog(clk, pstate); + + /* only call into the code if the GPU is powered on */ + if ((!clk->pstate || pstate != clk->pstate->pstate) +&& !pm_runtime_suspended(dev)) { + int ret; + /* it would be a shame if the GPU goes into suspend while doing +* the reclock +*/ + pm_runtime_get_sync(dev); + ret = nvkm_pstate_prog(clk, pstate); + pm_runtime_put(dev); if (ret) { nvkm_error(subdev, "error setting pstate %d: %d\n", pstate, ret); -- 2.13.2 ___ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 5/7] clk: We should pass the pstate id around not the index in the list
This makes the code easier, because we can compare the id with pstate->pstate and saves us from the trouble of iterating over the pstates to match the index. v2: reword commit message Signed-off-by: Karol HerbstReviewed-by: Martin Peres --- drm/nouveau/nouveau_debugfs.c | 6 +-- drm/nouveau/nvkm/subdev/clk/base.c | 78 +++--- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/drm/nouveau/nouveau_debugfs.c b/drm/nouveau/nouveau_debugfs.c index fd64dfdc..b114a429 100644 --- a/drm/nouveau/nouveau_debugfs.c +++ b/drm/nouveau/nouveau_debugfs.c @@ -96,11 +96,11 @@ nouveau_debugfs_pstate_get(struct seq_file *m, void *data) } while (attr.index); if (state >= 0) { - if (info.ustate_ac == state) + if (info.ustate_ac == attr.state) seq_printf(m, " AC"); - if (info.ustate_dc == state) + if (info.ustate_dc == attr.state) seq_printf(m, " DC"); - if (info.pstate == state) + if (info.pstate == attr.state) seq_printf(m, " *"); } else { if (info.ustate_ac < -1) diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index d37c13b7..1d71bf09 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -272,23 +272,26 @@ nvkm_cstate_new(struct nvkm_clk *clk, int idx, struct nvkm_pstate *pstate) * P-States */ static int -nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei) +nvkm_pstate_prog(struct nvkm_clk *clk, int pstateid) { struct nvkm_subdev *subdev = >subdev; struct nvkm_fb *fb = subdev->device->fb; struct nvkm_pci *pci = subdev->device->pci; struct nvkm_pstate *pstate; - int ret, idx = 0; + int ret; - if (pstatei == NVKM_CLK_PSTATE_DEFAULT) + if (pstateid == NVKM_CLK_PSTATE_DEFAULT) return 0; list_for_each_entry(pstate, >states, head) { - if (idx++ == pstatei) + if (pstate->pstate == pstateid) break; } - nvkm_debug(subdev, "setting performance state %d\n", pstatei); + if (!pstate) + return -EINVAL; + + nvkm_debug(subdev, "setting performance state %x\n", pstateid); clk->pstate = pstate; nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width); @@ -329,7 +332,6 @@ nvkm_clk_update_work(struct work_struct *work) pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc; if (clk->state_nr && pstate != -1) { pstate = (pstate < 0) ? clk->astate : pstate; - pstate = min(pstate, clk->state_nr - 1); } else { pstate = NVKM_CLK_PSTATE_DEFAULT; } @@ -491,32 +493,9 @@ nvkm_pstate_new(struct nvkm_clk *clk, int idx) * Adjustment triggers */ static int -nvkm_clk_ustate_update(struct nvkm_clk *clk, int req) -{ - struct nvkm_pstate *pstate; - int i = 0; - - if (!clk->allow_reclock) - return -ENOSYS; - - if (req != -1 && req != -2) { - list_for_each_entry(pstate, >states, head) { - if (pstate->pstate == req) - break; - i++; - } - - if (pstate->pstate != req) - return -EINVAL; - req = i; - } - - return req + 2; -} - -static int nvkm_clk_nstate(struct nvkm_clk *clk, const char *mode, int arglen) { + struct nvkm_pstate *pstate; int ret = 1; if (clk->allow_reclock && !strncasecmpz(mode, "auto", arglen)) @@ -528,27 +507,46 @@ nvkm_clk_nstate(struct nvkm_clk *clk, const char *mode, int arglen) ((char *)mode)[arglen] = '\0'; if (!kstrtol(mode, 0, )) { - ret = nvkm_clk_ustate_update(clk, v); + ret = v; if (ret < 0) ret = 1; } ((char *)mode)[arglen] = save; } - return ret - 2; + if (ret < 0) + return ret; + + list_for_each_entry(pstate, >states, head) { + if (pstate->pstate == ret) + return ret; + } + return -EINVAL; } int nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr) { - int ret = nvkm_clk_ustate_update(clk, req); - if (ret >= 0) { - if (ret -= 2, pwr) clk->ustate_ac = ret; - else clk->ustate_dc = ret; -
[Nouveau] [PATCH v2 1/7] clk: Rename nvkm_pstate_calc to nvkm_clk_update and export it
This function will be used to update the current clock state. This will happen for various reasons: * Temperature changes * User changes clocking state * Load changes v2: remove parameter name Signed-off-by: Karol Herbst--- drm/nouveau/include/nvkm/subdev/clk.h | 1 + drm/nouveau/nvkm/subdev/clk/base.c| 26 -- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h index e5275f74..ce3bbcfe 100644 --- a/drm/nouveau/include/nvkm/subdev/clk.h +++ b/drm/nouveau/include/nvkm/subdev/clk.h @@ -123,6 +123,7 @@ int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr); int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait); int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel); int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature); +int nvkm_clk_update(struct nvkm_clk *, bool wait); int nv04_clk_new(struct nvkm_device *, int, struct nvkm_clk **); int nv40_clk_new(struct nvkm_device *, int, struct nvkm_clk **); diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index e4c8d310..ecff3ff3 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -296,7 +296,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei) } static void -nvkm_pstate_work(struct work_struct *work) +nvkm_clk_update_work(struct work_struct *work) { struct nvkm_clk *clk = container_of(work, typeof(*clk), work); struct nvkm_subdev *subdev = >subdev; @@ -332,9 +332,15 @@ nvkm_pstate_work(struct work_struct *work) nvkm_notify_get(>pwrsrc_ntfy); } -static int -nvkm_pstate_calc(struct nvkm_clk *clk, bool wait) +int +nvkm_clk_update(struct nvkm_clk *clk, bool wait) { + if (!clk) + return -EINVAL; + + if (!clk->allow_reclock) + return -ENODEV; + atomic_set(>waiting, 1); schedule_work(>work); if (wait) @@ -524,7 +530,7 @@ nvkm_clk_ustate(struct nvkm_clk *clk, int req, int pwr) if (ret >= 0) { if (ret -= 2, pwr) clk->ustate_ac = ret; else clk->ustate_dc = ret; - return nvkm_pstate_calc(clk, true); + return nvkm_clk_update(clk, true); } return ret; } @@ -536,7 +542,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, bool wait) if ( rel) clk->astate += rel; clk->astate = min(clk->astate, clk->state_nr - 1); clk->astate = max(clk->astate, 0); - return nvkm_pstate_calc(clk, wait); + return nvkm_clk_update(clk, wait); } int @@ -545,7 +551,7 @@ nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp) if (clk->temp == temp) return 0; clk->temp = temp; - return nvkm_pstate_calc(clk, false); + return nvkm_clk_update(clk, false); } int @@ -555,7 +561,7 @@ nvkm_clk_dstate(struct nvkm_clk *clk, int req, int rel) if ( rel) clk->dstate += rel; clk->dstate = min(clk->dstate, clk->state_nr - 1); clk->dstate = max(clk->dstate, 0); - return nvkm_pstate_calc(clk, true); + return nvkm_clk_update(clk, true); } static int @@ -563,7 +569,7 @@ nvkm_clk_pwrsrc(struct nvkm_notify *notify) { struct nvkm_clk *clk = container_of(notify, typeof(*clk), pwrsrc_ntfy); - nvkm_pstate_calc(clk, false); + nvkm_clk_update(clk, false); return NVKM_NOTIFY_DROP; } @@ -618,7 +624,7 @@ nvkm_clk_init(struct nvkm_subdev *subdev) clk->dstate = 0; clk->pstate = -1; clk->temp = 90; /* reasonable default value */ - nvkm_pstate_calc(clk, true); + nvkm_clk_update(clk, true); return 0; } @@ -675,7 +681,7 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device, clk->ustate_dc = -1; clk->allow_reclock = allow_reclock; - INIT_WORK(>work, nvkm_pstate_work); + INIT_WORK(>work, nvkm_clk_update_work); init_waitqueue_head(>wait); atomic_set(>waiting, 0); -- 2.13.2 ___ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 6/7] clk: Set clocks to pre suspend state after suspend
The idea is to clear out the saved state, because after a resume we can't know what the GPU is clocked to. The reclock is triggered by the call to nvkm_clk_update later in nvkm_clk_init. v2: convert to C style comments Signed-off-by: Karol HerbstReviewed-by: Martin Peres --- drm/nouveau/nvkm/subdev/clk/base.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c index 1d71bf09..54188d2b 100644 --- a/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drm/nouveau/nvkm/subdev/clk/base.c @@ -625,11 +625,10 @@ nvkm_clk_init(struct nvkm_subdev *subdev) if (clk->func->init) return clk->func->init(clk); - clk->astate = NVKM_CLK_PSTATE_DEFAULT; + /* after a resume we have no idea what clocks are set, reset the state +*/ clk->pstate = NULL; - clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT; clk->cstate = NULL; - clk->temp = 90; /* reasonable default value */ nvkm_clk_update(clk, true); return 0; } @@ -683,8 +682,13 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device, clk->func = func; INIT_LIST_HEAD(>states); clk->domains = func->domains; + + clk->astate = NVKM_CLK_PSTATE_DEFAULT; clk->ustate_ac = -1; clk->ustate_dc = -1; + clk->exp_cstateid = NVKM_CLK_CSTATE_DEFAULT; + clk->temp = 90; /* reasonable default value */ + clk->allow_reclock = allow_reclock; INIT_WORK(>work, nvkm_clk_update_work); -- 2.13.2 ___ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH v2 0/7] clk subdev updates
This series addresses various issues inside the reclocking code: 1. after resume the set clocks are reset 2. reclocking not possible while GPU is suspended Some of the patches were part of the bigger reclocking series I sent months ago, some things have changed though. This is also preparation work of changing the clock state due to temperature changes and dynamic reclocking. v2: remove commits to support partial reclocks Karol Herbst (7): clk: Rename nvkm_pstate_calc to nvkm_clk_update and export it clk: Remove dstate clk: Make pstate a pointer to nvkm_pstate clk: Hold information about the current cstate status clk: We should pass the pstate id around not the index in the list clk: Set clocks to pre suspend state after suspend clk: Check pm_runtime status before reclocking drm/nouveau/include/nvkm/subdev/clk.h | 9 +- drm/nouveau/nouveau_debugfs.c | 6 +- drm/nouveau/nvkm/engine/device/ctrl.c | 5 +- drm/nouveau/nvkm/subdev/clk/base.c| 175 -- drm/nouveau/nvkm/subdev/pmu/gk20a.c | 18 ++-- 5 files changed, 123 insertions(+), 90 deletions(-) -- 2.13.2 ___ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau
[Nouveau] [PATCH 1/2] nv110/exa: Remove depbars
Removed explicit depar instructions as they're not used by the blob anymore. Signed-off-by: Aaryaman Vasishta--- src/shader/exac8nv110.fp | 5 ++--- src/shader/exac8nv110.fpc | 10 -- src/shader/exacanv110.fp | 5 ++--- src/shader/exacanv110.fpc | 10 -- src/shader/exacmnv110.fp | 5 ++--- src/shader/exacmnv110.fpc | 10 -- src/shader/exas8nv110.fp | 4 +--- src/shader/exas8nv110.fpc | 8 ++-- src/shader/exasanv110.fp | 5 ++--- src/shader/exasanv110.fpc | 10 -- src/shader/exascnv110.fp | 2 -- src/shader/exascnv110.fpc | 4 src/shader/videonv110.fp | 9 +++-- src/shader/videonv110.fpc | 18 ++ 14 files changed, 36 insertions(+), 69 deletions(-) diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp index ce78036..220d7e5 100644 --- a/src/shader/exac8nv110.fp +++ b/src/shader/exac8nv110.fp @@ -36,12 +36,11 @@ ipa $r3 a[0x84] $r0 0x0 0x1 sched (st 0x0) (st 0x0) (st 0x0) ipa $r2 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r2 0x0 0x0 t2d 0x8 -depbar le 0x5 0x0 0x0 -sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r3 $r0 $r1 +sched (st 0x0) (st 0x0) (st 0x0) mov $r2 $r3 0xf mov $r1 $r3 0xf -sched (st 0x0) (st 0x0) (st 0x0) mov $r0 $r3 0xf +sched (st 0x0) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc index 4aa1368..d8d5517 100644 --- a/src/shader/exac8nv110.fpc +++ b/src/shader/exac8nv110.fpc @@ -20,19 +20,17 @@ 0xe043ff88, 0x2ff70200, 0xc03a0004, -0x3407, -0xf0f0, -0xfc0007e0, -0x001f8000, 0x00170003, 0x5c681000, +0xfc0007e0, +0x001f8000, 0x00370002, 0x5c980780, 0x00370001, 0x5c980780, -0xfc0007e0, -0x001f8000, 0x0037, 0x5c980780, +0xfc0007e0, +0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp index a70d5c5..bad34c9 100644 --- a/src/shader/exacanv110.fp +++ b/src/shader/exacanv110.fp @@ -36,12 +36,11 @@ ipa $r1 a[0x84] $r0 0x0 0x1 sched (st 0x0) (st 0x0) (st 0x0) ipa $r0 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r0 0x0 0x0 t2d 0xf -depbar le 0x5 0x0 0x0 -sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r3 $r3 $r7 +sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r2 $r2 $r6 fmul ftz $r1 $r1 $r5 -sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r0 $r0 $r4 +sched (st 0x0) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc index 7c0ca5e..1a151a2 100644 --- a/src/shader/exacanv110.fpc +++ b/src/shader/exacanv110.fpc @@ -20,19 +20,17 @@ 0xe043ff88, 0xaff7, 0xc03a0007, -0x3407, -0xf0f0, -0xfc0007e0, -0x001f8000, 0x00770303, 0x5c681000, +0xfc0007e0, +0x001f8000, 0x00670202, 0x5c681000, 0x00570101, 0x5c681000, -0xfc0007e0, -0x001f8000, 0x0047, 0x5c681000, +0xfc0007e0, +0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp index fe5c294..c1eb38e 100644 --- a/src/shader/exacmnv110.fp +++ b/src/shader/exacmnv110.fp @@ -36,12 +36,11 @@ ipa $r1 a[0x84] $r0 0x0 0x1 sched (st 0x0) (st 0x0) (st 0x0) ipa $r0 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r0 0x0 0x0 t2d 0xf -depbar le 0x5 0x0 0x0 -sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r3 $r3 $r4 +sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r2 $r2 $r4 fmul ftz $r1 $r1 $r4 -sched (st 0x0) (st 0x0) (st 0x0) fmul ftz $r0 $r0 $r4 +sched (st 0x0) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc index 9d62c1a..0b21a0a 100644 --- a/src/shader/exacmnv110.fpc +++ b/src/shader/exacmnv110.fpc @@ -20,19 +20,17 @@ 0xe043ff88, 0xaff7, 0xc03a0007, -0x3407, -0xf0f0, -0xfc0007e0, -0x001f8000, 0x00470303, 0x5c681000, +0xfc0007e0, +0x001f8000, 0x00470202, 0x5c681000, 0x00470101, 0x5c681000, -0xfc0007e0, -0x001f8000, 0x0047, 0x5c681000, +0xfc0007e0, +0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp index 4fe2e19..c5349d9 100644 --- a/src/shader/exas8nv110.fp +++ b/src/shader/exas8nv110.fp @@ -32,11 +32,9 @@ ipa $r1 a[0x84] $r0 0x0 0x1 sched (st 0x0) (st 0x0) (st 0x0) ipa $r0 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r0 0x0 0x0 t2d 0x8 -depbar le 0x5 0x0 0x0 -sched (st 0x0) (st 0x0) (st 0x0) mov $r3 $r0 0xf +sched (st 0x0) (st 0x0) (st 0x0) mov $r2 $r0 0xf mov $r1 $r0 0xf -sched (st 0x0) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc index 1181c41..fabf980 100644 --- a/src/shader/exas8nv110.fpc +++ b/src/shader/exas8nv110.fpc @@ -12,17 +12,13 @@ 0xe043ff88, 0x2ff7, 0xc03a0004, -0x3407, -0xf0f0, -0xfc0007e0, -0x001f8000, 0x00070003, 0x5c980780, +0xfc0007e0, +0x001f8000, 0x00070002, 0x5c980780, 0x00070001, 0x5c980780, -0xfc0007e0, -0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp index 61374a6..4392c1f 100644 --- a/src/shader/exasanv110.fp +++ b/src/shader/exasanv110.fp @@ -36,12 +36,11 @@ ipa $r1 a[0x94] $r0
[Nouveau] [PATCH v5 2/2] nv110/exa: update sched codes
v5: Rebased on depbar removal patch; removed a redundant read dep-bar. This patch adds proper delays to maxwell exa shaders. rendercheck tests seem consistent with/without this patch. I haven't extensively tested them though. Trello: https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays Signed-off-by: Aaryaman Vasishta--- src/shader/exac8nv110.fp | 10 +- src/shader/exac8nv110.fpc | 18 +- src/shader/exacanv110.fp | 10 +- src/shader/exacanv110.fpc | 18 +- src/shader/exacmnv110.fp | 10 +- src/shader/exacmnv110.fpc | 18 +- src/shader/exas8nv110.fp | 6 +++--- src/shader/exas8nv110.fpc | 12 ++-- src/shader/exasanv110.fp | 10 +- src/shader/exasanv110.fpc | 18 +- src/shader/exascnv110.fp | 4 ++-- src/shader/exascnv110.fpc | 8 src/shader/videonv110.fp | 12 ++-- src/shader/videonv110.fpc | 22 +++--- 14 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp index 220d7e5..7797ef4 100644 --- a/src/shader/exac8nv110.fp +++ b/src/shader/exac8nv110.fp @@ -25,22 +25,22 @@ NV110FP_Composite_A8[] = { }; #else -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) ipa pass $r0 a[0x7c] 0x0 0x0 0x1 mufu rcp $r0 $r0 ipa $r3 a[0x94] $r0 0x0 0x1 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1) ipa $r2 a[0x90] $r0 0x0 0x1 tex nodep $r1 $r2 0x0 0x1 t2d 0x8 ipa $r3 a[0x84] $r0 0x0 0x1 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0x6 wt 0x1) ipa $r2 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r2 0x0 0x0 t2d 0x8 fmul ftz $r3 $r0 $r1 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0x1) (st 0x1) (st 0x1) mov $r2 $r3 0xf mov $r1 $r3 0xf mov $r0 $r3 0xf -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc index d8d5517..7eb9c02 100644 --- a/src/shader/exac8nv110.fpc +++ b/src/shader/exac8nv110.fpc @@ -1,36 +1,36 @@ -0xfc0007e0, -0x001f8000, +0xe1a0070f, +0x003c3c01, 0xcff7ff00, 0xe003ff87, 0x0047, 0x5080, 0x4007ff03, 0xe043ff89, -0xfc0007e0, -0x001f8000, +0xe1e0072f, +0x003c3c03, 0x0007ff02, 0xe043ff89, 0x2ff70201, 0xc03a0014, 0x4007ff03, 0xe043ff88, -0xfc0007e0, -0x001f8000, +0xe1e0072f, +0x003f9803, 0x0007ff02, 0xe043ff88, 0x2ff70200, 0xc03a0004, 0x00170003, 0x5c681000, -0xfc0007e0, -0x001f8000, +0xfc2007e1, +0x001f8400, 0x00370002, 0x5c980780, 0x00370001, 0x5c980780, 0x0037, 0x5c980780, -0xfc0007e0, +0xfc0007ef, 0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp index bad34c9..912f630 100644 --- a/src/shader/exacanv110.fp +++ b/src/shader/exacanv110.fp @@ -25,22 +25,22 @@ NV110FP_CAComposite[] = { }; #else -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) ipa pass $r0 a[0x7c] 0x0 0x0 0x1 mufu rcp $r0 $r0 ipa $r3 a[0x94] $r0 0x0 0x1 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1) ipa $r2 a[0x90] $r0 0x0 0x1 tex nodep $r4 $r2 0x0 0x1 t2d 0xf ipa $r1 a[0x84] $r0 0x0 0x1 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0x1 wt 0x3) ipa $r0 a[0x80] $r0 0x0 0x1 tex nodep $r0 $r0 0x0 0x0 t2d 0xf fmul ftz $r3 $r3 $r7 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0x1) (st 0x1) (st 0x1) fmul ftz $r2 $r2 $r6 fmul ftz $r1 $r1 $r5 fmul ftz $r0 $r0 $r4 -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf) (st 0x0) (st 0x0) exit #endif diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc index 1a151a2..4c35845 100644 --- a/src/shader/exacanv110.fpc +++ b/src/shader/exacanv110.fpc @@ -1,36 +1,36 @@ -0xfc0007e0, -0x001f8000, +0xe1a0070f, +0x003c3c01, 0xcff7ff00, 0xe003ff87, 0x0047, 0x5080, 0x4007ff03, 0xe043ff89, -0xfc0007e0, -0x001f8000, +0xe1e0072f, +0x001cbc03, 0x0007ff02, 0xe043ff89, 0xaff70204, 0xc03a0017, 0x4007ff01, 0xe043ff88, -0xfc0007e0, -0x001f8000, +0xe5e0172f, +0x007f8402, 0x0007ff00, 0xe043ff88, 0xaff7, 0xc03a0007, 0x00770303, 0x5c681000, -0xfc0007e0, -0x001f8000, +0xfc2007e1, +0x001f8400, 0x00670202, 0x5c681000, 0x00570101, 0x5c681000, 0x0047, 0x5c681000, -0xfc0007e0, +0xfc0007ef, 0x001f8000, 0x0007000f, 0xe300, diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp index c1eb38e..02a0835 100644 --- a/src/shader/exacmnv110.fp +++ b/src/shader/exacmnv110.fp @@ -25,22 +25,22 @@ NV110FP_Composite[] = { }; #else -sched (st 0x0) (st 0x0) (st 0x0) +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) ipa pass $r0 a[0x7c] 0x0 0x0 0x1 mufu rcp $r0 $r0 ipa $r3 a[0x94] $r0 0x0 0x1 -sched (st 0x0) (st 0x0) (st
Re: [Nouveau] [PATCH v4] nv110/exa: update sched codes
Hi, On Fri, Jun 30, 2017 at 6:26 AM, Samuel Pitoisetwrote: > Do you still have some glitches or does it work correctly now? No visible glitches on my machine so far (Pascal 1080, Debain stretch GNOME desktop) I used "rendercheck -f a8r8g8b8" to make sure there's no differences between the test results after the patch. They've helped me in my debugging to an extent. > Did you also remove the spurious wait dep bars between v3 and v4? There's a redundant read dep-bar on "sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)" which I've removed in v5 of this patch. I'll be sending them to the ML now. Cheers, Aaryaman > > On 06/27/2017 05:16 PM, Aaryaman Vasishta wrote: > >> v4: Updated the wait dependancy bars based on tex component masks. >> >> This patch adds proper delays to maxwell exa shaders. Tested with >> rendercheck -f a8r8g8b8. >> >> I am still wondering whether the rd's are required. We could >> still wait on the write bars instead. eg. see >> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in >> exacmnv110.fp >> >> Trello: >> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-wit >> h-proper-delays >> >> Signed-off-by: Aaryaman Vasishta >> --- >> src/shader/exac8nv110.fp | 10 +- >> src/shader/exac8nv110.fpc | 18 +- >> src/shader/exacanv110.fp | 10 +- >> src/shader/exacanv110.fpc | 18 +- >> src/shader/exacmnv110.fp | 10 +- >> src/shader/exacmnv110.fpc | 18 +- >> src/shader/exas8nv110.fp | 6 +++--- >> src/shader/exas8nv110.fpc | 12 ++-- >> src/shader/exasanv110.fp | 10 +- >> src/shader/exasanv110.fpc | 18 +- >> src/shader/exascnv110.fp | 6 +++--- >> src/shader/exascnv110.fpc | 10 +- >> src/shader/videonv110.fp | 14 +++--- >> src/shader/videonv110.fpc | 26 +- >> 14 files changed, 93 insertions(+), 93 deletions(-) >> >> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp >> index ce78036..101b67f 100644 >> --- a/src/shader/exac8nv110.fp >> +++ b/src/shader/exac8nv110.fp >> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = { >> }; >> #else >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) >> ipa pass $r0 a[0x7c] 0x0 0x0 0x1 >> mufu rcp $r0 $r0 >> ipa $r3 a[0x94] $r0 0x0 0x1 >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt >> 0x2) >> ipa $r2 a[0x90] $r0 0x0 0x1 >> tex nodep $r1 $r2 0x0 0x1 t2d 0x8 >> ipa $r3 a[0x84] $r0 0x0 0x1 >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf) >> ipa $r2 a[0x80] $r0 0x0 0x1 >> tex nodep $r0 $r2 0x0 0x0 t2d 0x8 >> depbar le 0x5 0x0 0x0 >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1) >> fmul ftz $r3 $r0 $r1 >> mov $r2 $r3 0xf >> mov $r1 $r3 0xf >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0x1) (st 0xf) (st 0x0) >> mov $r0 $r3 0xf >> exit >> #endif >> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc >> index 4aa1368..1f7d649 100644 >> --- a/src/shader/exac8nv110.fpc >> +++ b/src/shader/exac8nv110.fpc >> @@ -1,36 +1,36 @@ >> -0xfc0007e0, >> -0x001f8000, >> +0xe1a0070f, >> +0x003c3c01, >> 0xcff7ff00, >> 0xe003ff87, >> 0x0047, >> 0x5080, >> 0x4007ff03, >> 0xe043ff89, >> -0xfc0007e0, >> -0x001f8000, >> +0x21e0072f, >> +0x005cbc03, >> 0x0007ff02, >> 0xe043ff89, >> 0x2ff70201, >> 0xc03a0014, >> 0x4007ff03, >> 0xe043ff88, >> -0xfc0007e0, >> -0x001f8000, >> +0xe5e0074f, >> +0x001fbc06, >> 0x0007ff02, >> 0xe043ff88, >> 0x2ff70200, >> 0xc03a0004, >> 0x3407, >> 0xf0f0, >> -0xfc0007e0, >> -0x001f8000, >> +0xfc201fe6, >> +0x001f8400, >> 0x00170003, >> 0x5c681000, >> 0x00370002, >> 0x5c980780, >> 0x00370001, >> 0x5c980780, >> -0xfc0007e0, >> +0xfde007e1, >> 0x001f8000, >> 0x0037, >> 0x5c980780, >> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp >> index a70d5c5..fe55fcd 100644 >> --- a/src/shader/exacanv110.fp >> +++ b/src/shader/exacanv110.fp >> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = { >> }; >> #else >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) >> ipa pass $r0 a[0x7c] 0x0 0x0 0x1 >> mufu rcp $r0 $r0 >> ipa $r3 a[0x94] $r0 0x0 0x1 >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1) >> ipa $r2 a[0x90] $r0 0x0 0x1 >> tex nodep $r4 $r2 0x0 0x1 t2d 0xf >> ipa $r1 a[0x84] $r0 0x0 0x1 >> -sched (st 0x0) (st 0x0) (st 0x0) >> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf) >> ipa $r0 a[0x80] $r0 0x0 0x1 >> tex nodep $r0 $r0 0x0 0x0 t2d 0xf >> depbar le 0x5 0x0 0x0 >> -sched (st 0x0) (st 0x0) (st 0x0) >>